This is an R Markdown Notebook. When you execute code within the notebook, the results appear beneath the code.

#packages
library(sqldf)
## Loading required package: gsubfn
## Loading required package: proto
## Loading required package: RSQLite
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
pitches <- read.csv('C:\\Users\\Nick\\UCSB Baseball\\All_College_TM_19_22.csv')

Add a column that specifies how many pitches a pitcher has thrown

pitches$pitch_count <- with(pitches, ave(seq_along(paste(GameID, PitcherId)), paste(GameID, PitcherId), FUN = seq_along)) - 1

# Add a new factor column to the dataframe for the pitch group
pitches$pitch_group <- as.factor(ifelse(pitches$pitch_count < 100, (pitches$pitch_count) %/% 10 + 1, 11))

# Check the updated dataframe
head(pitches, 250)
ABCDEFGHIJ0123456789
 
 
X
<int>
PitchNo
<int>
Date
<chr>
Time
<chr>
PAofInning
<int>
PitchofPA
<int>
Pitcher
<chr>
PitcherId
<dbl>
1112022-02-1813:32:19.8611Kniskern, Trevor1000054486
2222022-02-1813:32:36.0012Kniskern, Trevor1000054486
3332022-02-1813:33:12.4513Kniskern, Trevor1000054486
4442022-02-1813:33:53.1721Kniskern, Trevor1000054486
5552022-02-1813:34:10.2822Kniskern, Trevor1000054486
6662022-02-1813:34:29.8023Kniskern, Trevor1000054486
7772022-02-1813:34:50.3624Kniskern, Trevor1000054486
8882022-02-1813:35:24.4225Kniskern, Trevor1000054486
9992022-02-1813:36:11.6931Kniskern, Trevor1000054486
1010102022-02-1813:36:36.9032Kniskern, Trevor1000054486

I want better names for the pitch_group levels

pitches$pitch_bin <- pitches$pitch_group

pitches$pitch_group <- NA

pitches$pitch_group[pitches$pitch_bin == '1'] <- '0-9 Pitches'
pitches$pitch_group[pitches$pitch_bin == '2'] <- '10-19 Pitches'
pitches$pitch_group[pitches$pitch_bin == '3'] <- '20-29 Pitches'
pitches$pitch_group[pitches$pitch_bin == '4'] <- '30-39 Pitches'
pitches$pitch_group[pitches$pitch_bin == '5'] <- '40-49 Pitches'
pitches$pitch_group[pitches$pitch_bin == '6'] <- '50-59 Pitches'
pitches$pitch_group[pitches$pitch_bin == '7'] <- '60-69 Pitches'
pitches$pitch_group[pitches$pitch_bin == '8'] <- '70-79 Pitches'
pitches$pitch_group[pitches$pitch_bin == '9'] <- '80-89 Pitches'
pitches$pitch_group[pitches$pitch_bin == '10'] <- '90-99 Pitches'
pitches$pitch_group[pitches$pitch_bin == '11'] <- '> 100 Pitches'

#MAke sure the order is correct. Really annoying if regression output isn't in ascending order
sqldf("SELECT pitch_group, count(*) from pitches GROUP BY pitch_group ORDER BY pitch_group")
ABCDEFGHIJ0123456789
pitch_group
<chr>
count(*)
<int>
0-9 Pitches297797
10-19 Pitches240134
20-29 Pitches170913
30-39 Pitches121986
40-49 Pitches90307
50-59 Pitches69952
60-69 Pitches55977
70-79 Pitches42704
80-89 Pitches29769
90-99 Pitches16710

Check for mistakes. Sometimes the trackman doesn’t change pitcherId when a new pitcher comes in.

library(sqldf)
library(dplyr)
sqldf("SELECT pitch_count, count(*) from pitches GROUP BY pitch_count")
ABCDEFGHIJ0123456789
pitch_count
<dbl>
count(*)
<int>
030862
130771
230656
330451
430168
529814
629431
729042
828564
928038
pitches[pitches$pitch_count > 120, ] %>% arrange(GameID)
ABCDEFGHIJ0123456789
X
<int>
PitchNo
<int>
Date
<chr>
Time
<chr>
PAofInning
<int>
PitchofPA
<int>
Pitcher
<chr>
PitcherId
<dbl>
8696442482019-03-299:42:46 PM34Lunn, Connor8899825
8696452492019-03-299:43:26 PM41Lunn, Connor8899825
8696462502019-03-299:43:56 PM42Lunn, Connor8899825
8696472512019-03-299:44:36 PM43Lunn, Connor8899825
8692752122019-03-298:57:59 PM75Bibee, Tanner1000018470
8800402492019-03-306:37:14 PM55Koski, Jon1000030130
8800412502019-03-306:37:35 PM56Koski, Jon1000030130
8800722812019-03-306:55:05 PM11Koski, Jon1000030130
8800732822019-03-306:55:17 PM12Koski, Jon1000030130
8800742832019-03-306:55:31 PM13Koski, Jon1000030130

Connor Lunn Seems incorrect. USC stats show Charles Acker went 8.2 in that game though. Probably just entirely incorrect. https://usctrojans.com/sports/baseball/stats/2019/ucla/boxscore/22237

Tanner Bibee seems CORRECT. HE went 6 inning on that day with 7 ER Koski, Jon is correct. Went 6.1 inning Coates, Chandler is CORRECT. Went 8 innings

Jordan MArks is not correct. MArks only went 6 (105 pitches). Will Wheeler went the last 3 https://upstatespartans.com/sports/baseball/stats/2019/radford/boxscore/630

Max MEyer is correct Went 7 innings 123 pitches that game Kade Strowd is correct Charles HAll is correct 134 pitches Jordan wicks is correct 9 IP 129 pitches JAke AGnos is correct. 129 MAx Meyer 4-26 correct 125 Alek manoah 124, 126 on 5/23 Bryce Elder correct 122 Tanner Bibee correct again 9IP 133 Zack thompson is correct 6.1IP

Kyle Murphy Incorrect. only went 2 innings. Stiehl and josh winkler included in his count https://nuhuskies.com/sports/baseball/stats/2020/alabama/boxscore/10415

Peyton W is correct

Chandler Fochs is weird. THis dataset says his 123rd pitch was in the 4th inning. Brian Rumping took over after 56 from fochs. Definitely check out this game. 20220306-UNCCharlotte-1 https://goleathernecks.com/boxscore.aspx?id=12858&path=baseball

Andrew PAtrick is fine 122 JEff Wilson is fine 9IP 129

The NA’s (GameID 20220318-Lipscomb-1 ) Isaiah Magwood started went 102, reid fagerstrom went 25, trevor andrews 14

miles smith is fine Isaiah coupet is fine 124

Julien hernandez did go 9, said he threw 118 though instead of 122 according to trackman (Maybe it counted warmup pitches?) D’Alessio, Andrew 6.0IP, 120 pitches instead of 125

John Michael Bertrand FINE Cole larsen 9IP. FINE 6.2IP 122 on 4-23 Justin PArker. Assumed FIne Fischer Paulsen. Assumed fine. 7.2IP Ivan MArtinez 6.1 IP 32 batters. Looks fine Jack perkins 128 FINE DAniel Hegarty FINE Joshua South. Assumed Fine

Riley Egloff. ISSUE. He pitched 6 inning 112 pitches. Might include warm up throws or something. https://golobos.com/boxscore/nevada-12/

Paul Skenes 8IP 123 pitches so slightly off. Trackman has 3 or 4 too many Peyton Wiggington 9IP 128 so trackman has two less

Joshua South (5/14) is correct cam reeves assumed correct Taylor GRant correct

Kirschsieper, Cole only threw 98 pitches. Ty Rybarczy came in for 23 and then Alex Vera finished with 51 https://fightingillini.com/sports/baseball/stats/2022/penn-state/boxscore/23558

JAcob cravey only went 6.2. Alex Goff Came in for the last 2.1 https://samfordsports.com/boxscore.aspx?path=baseball&id=12292

Tomasic, Connor only went 6.1 (100 pitches), Kyle Bischoff did the last 2.2 (40 pitches) https://bigten.org/boxscore.aspx?id=jhOYpxOXr63Fu7gkOpr76ZNub%2B51dDNCK2IY59m%2BpdLLZoN7nnqBYyrtcwLzCyhT71VLOajDDnc2bi%2BWgpv7bhQbjnwVxvuPMdtW9hhk%2Bk%2FeQOeE6RumhMwny5z6HwOz&path=baseball

Tyler stultz (5-20) is off by 1 but whatever. (5-26) is correct

Nick Dean only went five inning (87 pitches) Nigel Belgrave did 6 and 7. TOgether they threw 131 so theres ten missing or possibly correctly assigned to Belgrave 20220520-PurdueUniversity-1 https://umterps.com/sports/baseball/stats/2022/purdue/boxscore/12805

Why are most 120+ pitch outings in 2019 and 2022. Are there less pitches in general in 2020 and 2021 because of covid or something?

sqldf("SELECT SUBSTRING(Date, 1, 4) as year, count(*), pitch_group FROM pitches group by SUBSTRING(Date, 1, 4), pitch_group ORDER BY SUBSTRING(Date, 1, 4), pitch_group")
ABCDEFGHIJ0123456789
year
<chr>
count(*)
<int>
pitch_group
<chr>
80-9 Pitches
710-19 Pitches
1020-29 Pitches
330-39 Pitches
140-49 Pitches
350-59 Pitches
160-69 Pitches
380-89 Pitches
6> 100 Pitches
2019524190-9 Pitches
sqldf("SELECT SUBSTRING(Date, 1, 4) as year, count(*) FROM pitches group by SUBSTRING(Date, 1, 4) ORDER BY SUBSTRING(Date, 1, 4)")
ABCDEFGHIJ0123456789
year
<chr>
count(*)
<int>
42
2019206946
2020161249
20218537
2022767681

Looks like theres harldy any in 2021. We’ll keep this in mind.

##Fix Mistakes FOund above. Since were fixing stuff, don’t forget to redo the second and third code chunks to get accurate pitch_count and pitch_group columns

Fix conner Lunn’s 3-29-2019 outing. All 125 pitches assigned to him were thrown by Charles Acker. Actually, Charles Acker was in high school in 2019. It seems like the mistake might be in the Box Score on the USC website. Lunn and Acker were both number 35. I’ll come back to this

# Find Charles Ackers PitcherId
pitches[pitches$PitcherTeam == 'USC_UPS' & substr(pitches$Date, 1, 4) == '2019', ]
ABCDEFGHIJ0123456789
 
 
X
<int>
PitchNo
<int>
Date
<chr>
Time
<chr>
PAofInning
<int>
PitchofPA
<int>
Pitcher
<chr>
PitcherId
<dbl>
905297905297122019-04-056:09:18 PM11Marks, Jordan1000025130
905298905298132019-04-056:09:53 PM12Marks, Jordan1000025130
905299905299142019-04-056:10:10 PM13Marks, Jordan1000025130
905300905300152019-04-056:10:51 PM14Marks, Jordan1000025130
905301905301162019-04-056:11:07 PM15Marks, Jordan1000025130
905302905302172019-04-056:11:37 PM21Marks, Jordan1000025130
905303905303182019-04-056:11:52 PM22Marks, Jordan1000025130
905304905304192019-04-056:12:07 PM23Marks, Jordan1000025130
905305905305202019-04-056:12:25 PM24Marks, Jordan1000025130
905306905306212019-04-056:13:16 PM31Marks, Jordan1000025130

Jordan MArks has 141 pitches in the 4-7-19 game but he only threw 105. We need to give the last 36 to Will Wheeler and reset the pitch count. GameID: 20190405-CarterMemorial-1

#Find Will Wheelers PitcherId
pitches[pitches$Pitcher == 'Wheeler, Will', ]
ABCDEFGHIJ0123456789
X
<int>
PitchNo
<int>
Date
<chr>
Time
<chr>
PAofInning
<int>
PitchofPA
<int>
Pitcher
<chr>
PitcherId
<dbl>
PitcherThrows
<chr>
PitcherTeam
<chr>
pitches[pitches$PitcherTeam == 'USC_UPS' & substr(pitches$Date, 1, 4) == '2019', ]
ABCDEFGHIJ0123456789
 
 
X
<int>
PitchNo
<int>
Date
<chr>
Time
<chr>
PAofInning
<int>
PitchofPA
<int>
Pitcher
<chr>
PitcherId
<dbl>
905297905297122019-04-056:09:18 PM11Marks, Jordan1000025130
905298905298132019-04-056:09:53 PM12Marks, Jordan1000025130
905299905299142019-04-056:10:10 PM13Marks, Jordan1000025130
905300905300152019-04-056:10:51 PM14Marks, Jordan1000025130
905301905301162019-04-056:11:07 PM15Marks, Jordan1000025130
905302905302172019-04-056:11:37 PM21Marks, Jordan1000025130
905303905303182019-04-056:11:52 PM22Marks, Jordan1000025130
905304905304192019-04-056:12:07 PM23Marks, Jordan1000025130
905305905305202019-04-056:12:25 PM24Marks, Jordan1000025130
905306905306212019-04-056:13:16 PM31Marks, Jordan1000025130
#No games for will wheeeler, I'll create a new pitcherId for him. First I'll make sure it's not being used
pitches[(!(is.na(pitches$PitcherId)) & pitches$PitcherId == 1000025131), ]
ABCDEFGHIJ0123456789
X
<int>
PitchNo
<int>
Date
<chr>
Time
<chr>
PAofInning
<int>
PitchofPA
<int>
Pitcher
<chr>
PitcherId
<dbl>
PitcherThrows
<chr>
PitcherTeam
<chr>
#1000025131 is good

#Replace PitcherId for GameID, pitch_count, pitcherTeam 105through 141
#Check entire rows first
pitches[pitches$GameID == '20190405-CarterMemorial-1' & pitches$PitcherTeam == 'USC_UPS' & pitches$pitch_count >= 104, ]
ABCDEFGHIJ0123456789
 
 
X
<int>
PitchNo
<int>
Date
<chr>
Time
<chr>
PAofInning
<int>
PitchofPA
<int>
Pitcher
<chr>
PitcherId
<dbl>
9055199055192342019-04-058:18:24 PM11Marks, Jordan1000025130
9055209055202352019-04-058:18:37 PM12Marks, Jordan1000025130
9055219055212362019-04-058:18:54 PM13Marks, Jordan1000025130
9055229055222372019-04-058:19:10 PM14Marks, Jordan1000025130
9055239055232382019-04-058:19:26 PM15Marks, Jordan1000025130
9055249055242392019-04-058:20:05 PM21Marks, Jordan1000025130
9055259055252402019-04-058:20:21 PM22Marks, Jordan1000025130
9055269055262412019-04-058:20:36 PM23Marks, Jordan1000025130
9055279055272422019-04-058:21:12 PM31Marks, Jordan1000025130
9055289055282432019-04-058:21:29 PM32Marks, Jordan1000025130
#Now Replace
pitches$PitcherId[pitches$GameID == '20190405-CarterMemorial-1' & pitches$PitcherTeam == 'USC_UPS' & pitches$pitch_count >= 104] <- 1000025131
#Replace Name too Why not
pitches$Pitcher[pitches$GameID == '20190405-CarterMemorial-1' & pitches$PitcherTeam == 'USC_UPS' & pitches$pitch_count >= 104] <- 'Wheeler, Will'

Kyle Murphy’s 153 pitches we’re actually 70 for him, 69 for Stiehl, David and hen 16 for Winkler, Josh. GAmeID 20200214-SwellThomasStadium-1

Quite a discrepency between box score and trackman. I’g going to say Murphy got pulled after the 2nd batter in the 3rd inning. Pitch count 68 is first pitch for Stiehl, DAvid. He goes until end of 6th. Josh Winkler starts the sevent at pitch count 137 until the end

# Look at all pitches for this game
pitches[pitches$GameID == '20200214-SwellThomasStadium-1' & pitches$PitcherTeam == 'NOR_HUS', ]
ABCDEFGHIJ0123456789
 
 
X
<int>
PitchNo
<int>
Date
<chr>
Time
<chr>
PAofInning
<int>
PitchofPA
<int>
Pitcher
<chr>
PitcherId
<dbl>
980642980642102020-02-143:06:26 PM11Murphy, Kyle8892851
980643980643112020-02-143:06:44 PM12Murphy, Kyle8892851
980644980644122020-02-143:07:27 PM21Murphy, Kyle8892851
980645980645132020-02-143:07:40 PM22Murphy, Kyle8892851
980646980646142020-02-143:08:01 PM23Murphy, Kyle8892851
980647980647152020-02-143:08:31 PM31Murphy, Kyle8892851
980648980648162020-02-143:09:11 PM32Murphy, Kyle8892851
980649980649172020-02-143:09:45 PM33Murphy, Kyle8892851
980650980650182020-02-143:10:08 PM34Murphy, Kyle8892851
980651980651192020-02-143:10:31 PM35Murphy, Kyle8892851
#Getting PitcherIds for stiehl and Winkler
pitches[pitches$Pitcher == 'Stiehl, David', ]
ABCDEFGHIJ0123456789
 
 
X
<int>
PitchNo
<int>
Date
<chr>
Time
<chr>
PAofInning
<int>
PitchofPA
<int>
Pitcher
<chr>
PitcherId
<dbl>
78498078498012019-03-019:59:43 AM11Stiehl, David673924
78498178498122019-03-019:59:59 AM12Stiehl, David673924
78498278498232019-03-0110:00:15 AM13Stiehl, David673924
78498378498342019-03-0110:00:33 AM14Stiehl, David673924
78498478498452019-03-0110:00:50 AM15Stiehl, David673924
78498578498562019-03-0110:01:49 AM21Stiehl, David673924
78498678498672019-03-0110:02:18 AM22Stiehl, David673924
78498778498782019-03-0110:03:24 AM23Stiehl, David673924
78498878498892019-03-0110:03:43 AM24Stiehl, David673924
784989784989102019-03-0110:04:02 AM25Stiehl, David673924
#673924
pitches[pitches$Pitcher == 'Winkler, Josh', ]
ABCDEFGHIJ0123456789
 
 
X
<int>
PitchNo
<int>
Date
<chr>
Time
<chr>
PAofInning
<int>
PitchofPA
<int>
Pitcher
<chr>
PitcherId
<dbl>
7852027852022232019-03-0112:08:21 PM41Winkler, Josh1000025808
7852037852032242019-03-0112:08:59 PM51Winkler, Josh1000025808
7852047852042252019-03-0112:09:21 PM52Winkler, Josh1000025808
7852057852052262019-03-0112:09:42 PM53Winkler, Josh1000025808
7852067852062272019-03-0112:10:08 PM54Winkler, Josh1000025808
7852077852072282019-03-0112:10:31 PM55Winkler, Josh1000025808
7852087852082292019-03-0112:11:02 PM56Winkler, Josh1000025808
7852097852092302019-03-0112:11:34 PM61Winkler, Josh1000025808
7852107852102312019-03-0112:11:54 PM62Winkler, Josh1000025808
7852117852112322019-03-0112:12:27 PM63Winkler, Josh1000025808
#1000025808



#Replace Stiehls pitches' PitcherId for GameID, pitch_count, pitcherTeam 68 through 137
#Check entire rows first
pitches[pitches$GameID == '20200214-SwellThomasStadium-1' & pitches$PitcherTeam == 'NOR_HUS' & pitches$pitch_count >= 68 & pitches$pitch_count <= 136, ]
ABCDEFGHIJ0123456789
 
 
X
<int>
PitchNo
<int>
Date
<chr>
Time
<chr>
PAofInning
<int>
PitchofPA
<int>
Pitcher
<chr>
PitcherId
<dbl>
9807409807401082020-02-143:57:01 PM31Murphy, Kyle8892851
9807419807411092020-02-143:57:23 PM32Murphy, Kyle8892851
9807429807421102020-02-143:57:49 PM33Murphy, Kyle8892851
9807439807431112020-02-143:58:22 PM34Murphy, Kyle8892851
9807449807441122020-02-143:58:46 PM35Murphy, Kyle8892851
9807459807451132020-02-143:59:40 PM41Murphy, Kyle8892851
9807469807461142020-02-144:00:11 PM51Murphy, Kyle8892851
9807479807471152020-02-144:00:27 PM52Murphy, Kyle8892851
9807489807481162020-02-144:00:45 PM53Murphy, Kyle8892851
9807499807491172020-02-144:01:04 PM54Murphy, Kyle8892851
#Now Replace
pitches$PitcherId[pitches$GameID == '20200214-SwellThomasStadium-1' & pitches$PitcherTeam == 'NOR_HUS' & pitches$pitch_count >= 68 & pitches$pitch_count <= 136] <- 673924
#Replace Name too Why not
pitches$Pitcher[pitches$GameID == '20200214-SwellThomasStadium-1' & pitches$PitcherTeam == 'NOR_HUS' & pitches$pitch_count >= 68 & pitches$pitch_count <= 136] <- 'Stiehl, David'


#Replace Winklers pitches' PitcherId for GameID, pitch_count, pitcherTeam 68 through 137
#Check entire rows first
pitches[pitches$GameID == '20200214-SwellThomasStadium-1' & pitches$PitcherTeam == 'NOR_HUS' & pitches$pitch_count >= 137, ]
ABCDEFGHIJ0123456789
 
 
X
<int>
PitchNo
<int>
Date
<chr>
Time
<chr>
PAofInning
<int>
PitchofPA
<int>
Pitcher
<chr>
PitcherId
<dbl>
9808859808852532020-02-145:14:12 PM11Murphy, Kyle8892851
9808869808862542020-02-145:14:44 PM21Murphy, Kyle8892851
9808879808872552020-02-145:15:03 PM22Murphy, Kyle8892851
9808889808882562020-02-145:15:22 PM23Murphy, Kyle8892851
9808899808892572020-02-145:15:49 PM24Murphy, Kyle8892851
9808909808902582020-02-145:16:23 PM25Murphy, Kyle8892851
9808919808912592020-02-145:16:49 PM26Murphy, Kyle8892851
9808929808922602020-02-145:17:26 PM31Murphy, Kyle8892851
9808939808932612020-02-145:17:47 PM32Murphy, Kyle8892851
9808949808942622020-02-145:18:11 PM33Murphy, Kyle8892851
#Now Replace
pitches$PitcherId[pitches$GameID == '20200214-SwellThomasStadium-1' & pitches$PitcherTeam == 'NOR_HUS' & pitches$pitch_count >= 137] <- 1000025808
#Replace Name too Why not
pitches$Pitcher[pitches$GameID == '20200214-SwellThomasStadium-1' & pitches$PitcherTeam == 'NOR_HUS' & pitches$pitch_count >= 137] <- 'Winkler, Josh'

Fix Chandler Fochs, GAmeID: 20220306-UNCCharlotte-1 Chandler: 2.0IP, 56 pitches, Rumping, Bryan 1.2IP, 67 pitches; Jaynes, Will 1.0IP 47 pirches; Kratz, Caden 1.1, 27

CHandler pulled after 56. Give Pitch_count 57 through upped bound to Rumping, Bryan

# Look at all pitches for this game
pitches[pitches$GameID == '20220306-UNCCharlotte-1' & pitches$PitcherTeam == 'WIU_LEA', ]
ABCDEFGHIJ0123456789
 
 
X
<int>
PitchNo
<int>
Date
<chr>
Time
<chr>
PAofInning
<int>
PitchofPA
<int>
Pitcher
<chr>
PitcherId
<dbl>
144847144847232022-03-0612:13:21.4911Fochs, Chandler1000057813
144848144848242022-03-0612:13:40.8612Fochs, Chandler1000057813
144849144849252022-03-0612:13:55.4813Fochs, Chandler1000057813
144850144850262022-03-0612:14:10.0214Fochs, Chandler1000057813
144851144851272022-03-0612:14:31.3815Fochs, Chandler1000057813
144852144852282022-03-0612:14:49.0116Fochs, Chandler1000057813
144853144853292022-03-0612:15:29.4221Fochs, Chandler1000057813
144854144854302022-03-0612:15:44.7722Fochs, Chandler1000057813
144855144855312022-03-0612:16:05.4523Fochs, Chandler1000057813
144856144856322022-03-0612:16:22.7024Fochs, Chandler1000057813
#Find Rumping's pitcherId
pitches[pitches$Pitcher == 'Rumping, Bryan', ]
ABCDEFGHIJ0123456789
 
 
X
<int>
PitchNo
<int>
Date
<chr>
Time
<chr>
PAofInning
<int>
PitchofPA
<int>
Pitcher
<chr>
PitcherId
<dbl>
6062376062372682022-05-0320:35:12.7811Rumping, Bryan1000076881
6062386062382692022-05-0320:35:24.8212Rumping, Bryan1000076881
6062396062392702022-05-0320:35:40.4913Rumping, Bryan1000076881
6062406062402712022-05-0320:36:34.3721Rumping, Bryan1000076881
6062416062412722022-05-0320:37:35.2631Rumping, Bryan1000076881
6062426062422732022-05-0320:38:15.3541Rumping, Bryan1000076881
6062436062432742022-05-0320:38:27.4342Rumping, Bryan1000076881
6062446062442752022-05-0320:38:40.0843Rumping, Bryan1000076881
#1000076881

#Replace PitcherId for GameID, pitch_count, pitcherTeam 57 and up
#Check entire rows first
pitches[pitches$GameID == '20220306-UNCCharlotte-1' & pitches$PitcherTeam == 'WIU_LEA' & pitches$pitch_count >= 57, ]
ABCDEFGHIJ0123456789
 
 
X
<int>
PitchNo
<int>
Date
<chr>
Time
<chr>
PAofInning
<int>
PitchofPA
<int>
Pitcher
<chr>
PitcherId
<dbl>
144918144918942022-03-0612:54:27.9411Fochs, Chandler1000057813
144919144919952022-03-0612:54:42.8612Fochs, Chandler1000057813
144920144920962022-03-0612:54:59.2513Fochs, Chandler1000057813
144921144921972022-03-0612:55:18.3514Fochs, Chandler1000057813
144922144922982022-03-0612:55:48.1115Fochs, Chandler1000057813
144923144923992022-03-0612:56:04.3616Fochs, Chandler1000057813
1449241449241002022-03-0612:56:38.9921Fochs, Chandler1000057813
1449251449251012022-03-0612:56:56.7922Fochs, Chandler1000057813
1449261449261022022-03-0612:57:15.1423Fochs, Chandler1000057813
1449271449271032022-03-0612:57:36.1824Fochs, Chandler1000057813
#Now Replace
pitches$PitcherId[pitches$GameID == '20220306-UNCCharlotte-1' & pitches$PitcherTeam == 'WIU_LEA' & pitches$pitch_count >= 57] <- 1000076881
#Replace Name too Why not
pitches$Pitcher[pitches$GameID == '20220306-UNCCharlotte-1' & pitches$PitcherTeam == 'WIU_LEA' & pitches$pitch_count >= 57] <- 'Rumping, Bryan'

Fix the NA pitcher Ids for this game 20220318-Lipscomb-1

# Look at all pitches for this game
pitches[pitches$GameID == '20220318-Lipscomb-1' & pitches$PitcherTeam == 'LIP_PRA', ]
ABCDEFGHIJ0123456789
 
 
X
<int>
PitchNo
<int>
Date
<chr>
Time
<chr>
PAofInning
<int>
PitchofPA
<int>
Pitcher
<chr>
PitcherId
<dbl>
PitcherThrows
<chr>
22408822408812022-03-1815:04:22.7611NARight
22408922408922022-03-1815:04:43.9312NARight
22409022409032022-03-1815:05:02.3913NARight
22409122409142022-03-1815:05:34.8921NARight
22409222409252022-03-1815:05:47.5122NARight
22409322409362022-03-1815:06:08.2123NARight
22409422409472022-03-1815:06:31.7224NARight
22409522409582022-03-1815:06:53.4125NARight
22409622409692022-03-1815:07:09.5226NARight
224097224097102022-03-1815:07:48.7531NARight
#Getting PitcherIds for stiehl and Winkler
pitches[pitches$Pitcher == 'Kantola, Kaleb', ]
ABCDEFGHIJ0123456789
 
 
X
<int>
PitchNo
<int>
Date
<chr>
Time
<chr>
PAofInning
<int>
PitchofPA
<int>
Pitcher
<chr>
PitcherId
<dbl>
5157515712022-02-1814:06:25.2311Kantola, Kaleb1000094167
5158515822022-02-1814:06:42.1712Kantola, Kaleb1000094167
5159515932022-02-1814:07:04.9213Kantola, Kaleb1000094167
5160516042022-02-1814:07:22.5914Kantola, Kaleb1000094167
5161516152022-02-1814:07:49.5815Kantola, Kaleb1000094167
5162516262022-02-1814:08:15.7316Kantola, Kaleb1000094167
5163516372022-02-1814:08:37.8617Kantola, Kaleb1000094167
5164516482022-02-1814:09:17.7321Kantola, Kaleb1000094167
5165516592022-02-1814:09:36.6122Kantola, Kaleb1000094167
51665166102022-02-1814:10:02.6223Kantola, Kaleb1000094167
#1000094167
pitches[pitches$Pitcher == 'Williams, Patrick', ]
ABCDEFGHIJ0123456789
 
 
X
<int>
PitchNo
<int>
Date
<chr>
Time
<chr>
PAofInning
<int>
PitchofPA
<int>
Pitcher
<chr>
PitcherId
<dbl>
96294962941082022-03-0216:11:01.9451Williams, Patrick1000085191
96295962951092022-03-0216:11:18.7652Williams, Patrick1000085191
96296962961102022-03-0216:11:43.4153Williams, Patrick1000085191
96297962971112022-03-0216:12:03.1854Williams, Patrick1000085191
96298962981122022-03-0216:12:23.5155Williams, Patrick1000085191
96306963061202022-03-0216:19:24.9011Williams, Patrick1000085191
96307963071212022-03-0216:19:37.7612Williams, Patrick1000085191
96308963081222022-03-0216:20:16.0021Williams, Patrick1000085191
96309963091232022-03-0216:20:46.7531Williams, Patrick1000085191
96330963301442022-03-0216:32:19.5711Williams, Patrick1000085191
#1000085191
pitches[pitches$Pitcher == 'Newell, Will', ]
ABCDEFGHIJ0123456789
 
 
X
<int>
PitchNo
<int>
Date
<chr>
Time
<chr>
PAofInning
<int>
PitchofPA
<int>
Pitcher
<chr>
PitcherId
<dbl>
18121181212982022-02-1917:04:37.4611Newell, Will1000106568
18122181222992022-02-1917:04:53.9212Newell, Will1000106568
18123181233002022-02-1917:05:15.8813Newell, Will1000106568
18124181243012022-02-1917:05:47.5121Newell, Will1000106568
18125181253022022-02-1917:06:01.6922Newell, Will1000106568
18126181263032022-02-1917:06:23.0023Newell, Will1000106568
18127181273042022-02-1917:06:51.5624Newell, Will1000106568
18128181283052022-02-1917:07:27.3531Newell, Will1000106568
18129181293062022-02-1917:07:43.5032Newell, Will1000106568
18130181303072022-02-1917:08:00.2233Newell, Will1000106568
#1000106568




#Give first 68 piches to Kantola, Kaleb
#Check entire rows first
pitches[pitches$GameID == '20220318-Lipscomb-1' & pitches$PitcherTeam == 'LIP_PRA' & pitches$pitch_count <= 68, ]
ABCDEFGHIJ0123456789
 
 
X
<int>
PitchNo
<int>
Date
<chr>
Time
<chr>
PAofInning
<int>
PitchofPA
<int>
Pitcher
<chr>
PitcherId
<dbl>
PitcherThrows
<chr>
22408822408812022-03-1815:04:22.7611NARight
22408922408922022-03-1815:04:43.9312NARight
22409022409032022-03-1815:05:02.3913NARight
22409122409142022-03-1815:05:34.8921NARight
22409222409252022-03-1815:05:47.5122NARight
22409322409362022-03-1815:06:08.2123NARight
22409422409472022-03-1815:06:31.7224NARight
22409522409582022-03-1815:06:53.4125NARight
22409622409692022-03-1815:07:09.5226NARight
224097224097102022-03-1815:07:48.7531NARight
#Now Replace
pitches$PitcherId[pitches$GameID == '20220318-Lipscomb-1' & pitches$PitcherTeam == 'LIP_PRA' & pitches$pitch_count <= 68] <- 1000094167
#Replace Name too Why not
pitches$Pitcher[pitches$GameID == '20220318-Lipscomb-1' & pitches$PitcherTeam == 'LIP_PRA' & pitches$pitch_count <= 68] <- 'Kantola, Kaleb'


#Replace NA pitches' PitcherId for GameID, pitch_count, pitcherTeam 69 through 99
#Check entire rows first
pitches[pitches$GameID == '20220318-Lipscomb-1' & pitches$PitcherTeam == 'LIP_PRA' & pitches$pitch_count > 68 & pitches$pitch_count <= 99, ]
ABCDEFGHIJ0123456789
 
 
X
<int>
PitchNo
<int>
Date
<chr>
Time
<chr>
PAofInning
<int>
PitchofPA
<int>
Pitcher
<chr>
PitcherId
<dbl>
PitcherThrows
<chr>
2242182242181312022-03-1816:12:36.4711NARight
2242192242191322022-03-1816:12:49.0912NARight
2242202242201332022-03-1816:13:08.1213NARight
2242212242211342022-03-1816:13:20.8114NARight
2242222242221352022-03-1816:13:38.4415NARight
2242232242231362022-03-1816:13:54.8216NARight
2242242242241372022-03-1816:14:35.4521NARight
2242252242251382022-03-1816:14:54.9822NARight
2242262242261392022-03-1816:15:16.9423NARight
2242272242271402022-03-1816:15:38.7224NARight
#Now Replace
pitches$PitcherId[pitches$GameID == '20220318-Lipscomb-1' & pitches$PitcherTeam == 'LIP_PRA' & pitches$pitch_count > 68 & pitches$pitch_count <= 99] <- 1000085191
#Replace Name too Why not
pitches$Pitcher[pitches$GameID == '20220318-Lipscomb-1' & pitches$PitcherTeam == 'LIP_PRA' & pitches$pitch_count > 68 & pitches$pitch_count <= 99] <- 'Williams, Patrick'

#Replace last 50 or so pitcher ids with Newells
#Check entire rows first
pitches[pitches$GameID == '20220318-Lipscomb-1' & pitches$PitcherTeam == 'LIP_PRA' & pitches$pitch_count >= 100, ]
ABCDEFGHIJ0123456789
 
 
X
<int>
PitchNo
<int>
Date
<chr>
Time
<chr>
PAofInning
<int>
PitchofPA
<int>
Pitcher
<chr>
PitcherId
<dbl>
PitcherThrows
<chr>
2242902242902032022-03-1816:51:19.7811NARight
2242912242912042022-03-1816:51:31.9412NARight
2242922242922052022-03-1816:51:50.7513NARight
2242932242932062022-03-1816:52:04.2714NARight
2242942242942072022-03-1816:52:53.7621NARight
2242952242952082022-03-1816:54:51.0831NARight
2242962242962092022-03-1816:55:44.1241NARight
2242972242972102022-03-1816:56:03.6342NARight
2242982242982112022-03-1816:56:27.6143NARight
2242992242992122022-03-1816:56:52.8751NARight
#Now Replace
pitches$PitcherId[pitches$GameID == '20220318-Lipscomb-1' & pitches$PitcherTeam == 'LIP_PRA' & pitches$pitch_count >= 100] <- 1000106568
#Replace Name too Why not
pitches$Pitcher[pitches$GameID == '20220318-Lipscomb-1' & pitches$PitcherTeam == 'LIP_PRA' & pitches$pitch_count >= 100] <- 'Newell, Will'

20220519-LubranoPark-1. COle got given way too many. 2 other guys came in. Ty Rybarczy came in for 23 and then Alex Vera finished with 51. Only 92 for cole

# Look at all pitches for this game
pitches[pitches$GameID == '20220519-LubranoPark-1' & pitches$PitcherTeam == 'ILL_ILL', ]
ABCDEFGHIJ0123456789
 
 
X
<int>
PitchNo
<int>
Date
<chr>
Time
<chr>
PAofInning
<int>
PitchofPA
<int>
Pitcher
<chr>
PitcherId
<dbl>
726427726427212022-05-1917:14:27.8011Kirschsieper, Cole1000050785
726428726428222022-05-1917:14:40.7412Kirschsieper, Cole1000050785
726429726429232022-05-1917:14:53.5813Kirschsieper, Cole1000050785
726430726430242022-05-1917:15:48.3721Kirschsieper, Cole1000050785
726431726431252022-05-1917:16:12.5922Kirschsieper, Cole1000050785
726432726432262022-05-1917:16:43.7123Kirschsieper, Cole1000050785
726433726433272022-05-1917:17:17.4724Kirschsieper, Cole1000050785
726434726434282022-05-1917:18:05.8931Kirschsieper, Cole1000050785
726435726435292022-05-1917:18:38.3232Kirschsieper, Cole1000050785
726436726436302022-05-1917:19:31.1341Kirschsieper, Cole1000050785
#Ty Rybarczy id
pitches[pitches$Pitcher == 'Rybarczyk, Ty', ]
ABCDEFGHIJ0123456789
 
 
X
<int>
PitchNo
<int>
Date
<chr>
Time
<chr>
PAofInning
<int>
PitchofPA
<int>
Pitcher
<chr>
PitcherId
<dbl>
3431253431252182022-04-0119:59:11.9011Rybarczyk, Ty1000084463
3431263431262192022-04-0119:59:32.1412Rybarczyk, Ty1000084463
3431273431272202022-04-0119:59:49.7913Rybarczyk, Ty1000084463
3431283431282212022-04-0120:00:13.3214Rybarczyk, Ty1000084463
3431293431292222022-04-0120:00:37.1715Rybarczyk, Ty1000084463
3431303431302232022-04-0120:01:09.9621Rybarczyk, Ty1000084463
3431313431312242022-04-0120:02:20.8631Rybarczyk, Ty1000084463
3431323431322252022-04-0120:02:46.9932Rybarczyk, Ty1000084463
3431333431332262022-04-0120:03:09.9833Rybarczyk, Ty1000084463
3431343431342272022-04-0120:03:55.9641Rybarczyk, Ty1000084463
#1000084463
#Alex Vera ID
pitches[pitches$Pitcher == 'Vera, Alex', ]
ABCDEFGHIJ0123456789
 
 
X
<int>
PitchNo
<int>
Date
<chr>
Time
<chr>
PAofInning
<int>
PitchofPA
<int>
Pitcher
<chr>
PitcherId
<dbl>
6754067540832022-02-2619:31:11.5961Vera, Alex1000050778
6754167541842022-02-2619:31:32.4462Vera, Alex1000050778
6754267542852022-02-2619:31:53.8263Vera, Alex1000050778
6754367543862022-02-2619:32:13.4364Vera, Alex1000050778
6754467544872022-02-2619:32:33.9365Vera, Alex1000050778
6754567545882022-02-2619:32:57.0966Vera, Alex1000050778
6754667546892022-02-2619:33:46.2771Vera, Alex1000050778
6754767547902022-02-2619:34:06.9372Vera, Alex1000050778
6754867548912022-02-2619:34:29.9073Vera, Alex1000050778
6754967549922022-02-2619:34:50.4874Vera, Alex1000050778
#1000050778

#GIve TY all the pitches starting at pitch_count 94 up to 117
#Check entire rows first
pitches[pitches$GameID == '20220519-LubranoPark-1' & pitches$PitcherTeam == 'ILL_ILL' & pitches$pitch_count >= 94 & pitches$pitch_count <= 117, ]
ABCDEFGHIJ0123456789
 
 
X
<int>
PitchNo
<int>
Date
<chr>
Time
<chr>
PAofInning
<int>
PitchofPA
<int>
Pitcher
<chr>
PitcherId
<dbl>
7266037266031972022-05-1918:55:32.9591Kirschsieper, Cole1000050785
7266047266041982022-05-1918:56:23.32101Kirschsieper, Cole1000050785
7266057266051992022-05-1918:56:46.96102Kirschsieper, Cole1000050785
7266067266062002022-05-1918:57:08.76103Kirschsieper, Cole1000050785
7266077266072012022-05-1918:57:29.42104Kirschsieper, Cole1000050785
7266087266082022022-05-1918:57:58.24105Kirschsieper, Cole1000050785
7266247266242182022-05-1919:08:11.3411Kirschsieper, Cole1000050785
7266257266252192022-05-1919:08:31.4112Kirschsieper, Cole1000050785
7266267266262202022-05-1919:09:05.1721Kirschsieper, Cole1000050785
7266277266272212022-05-1919:09:22.5422Kirschsieper, Cole1000050785
#Now Replace
pitches$PitcherId[pitches$GameID == '20220519-LubranoPark-1' & pitches$PitcherTeam == 'ILL_ILL' & pitches$pitch_count >= 94 & pitches$pitch_count <= 117] <- 1000084463
#Replace Name too Why not
pitches$Pitcher[pitches$GameID == '20220519-LubranoPark-1' & pitches$PitcherTeam == 'ILL_ILL' & pitches$pitch_count >= 94 & pitches$pitch_count <= 117] <- 'Rybarczyk, Ty'

#Give Alex vera everything above 117
#Check entire rows first
pitches[pitches$GameID == '20220519-LubranoPark-1' & pitches$PitcherTeam == 'ILL_ILL' & pitches$pitch_count > 117, ]
ABCDEFGHIJ0123456789
 
 
X
<int>
PitchNo
<int>
Date
<chr>
Time
<chr>
PAofInning
<int>
PitchofPA
<int>
Pitcher
<chr>
PitcherId
<dbl>
7266617266612552022-05-1919:35:24.0911Kirschsieper, Cole1000050785
7266627266622562022-05-1919:35:39.3712Kirschsieper, Cole1000050785
7266637266632572022-05-1919:36:01.1813Kirschsieper, Cole1000050785
7266647266642582022-05-1919:36:36.7821Kirschsieper, Cole1000050785
7266657266652592022-05-1919:36:48.8922Kirschsieper, Cole1000050785
7266667266662602022-05-1919:37:04.5123Kirschsieper, Cole1000050785
7266677266672612022-05-1919:37:18.3824Kirschsieper, Cole1000050785
7266687266682622022-05-1919:37:40.1725Kirschsieper, Cole1000050785
7266697266692632022-05-1919:37:54.8026Kirschsieper, Cole1000050785
7266707266702642022-05-1919:38:13.6627Kirschsieper, Cole1000050785
#Now Replace
pitches$PitcherId[pitches$GameID == '20220519-LubranoPark-1' & pitches$PitcherTeam == 'ILL_ILL' & pitches$pitch_count > 117] <- 1000050778
#Replace Name too Why not
pitches$Pitcher[pitches$GameID == '20220519-LubranoPark-1' & pitches$PitcherTeam == 'ILL_ILL' & pitches$pitch_count > 117] <- 'Vera, Alex'

20220519-SamfordUniversity-2. Cravey went 6.2IP 29BF, Goff, Alex went 2.1 9BF

# Look at all pitches for this game
pitches[pitches$GameID == '20220519-SamfordUniversity-2' & pitches$PitcherTeam == 'SAM_BUL', ]
ABCDEFGHIJ0123456789
 
 
X
<int>
PitchNo
<int>
Date
<chr>
Time
<chr>
PAofInning
<int>
PitchofPA
<int>
Pitcher
<chr>
PitcherId
<dbl>
72874472874412022-05-1916:03:01.0111Cravey, Jacob1000101505
72874572874522022-05-1916:03:12.9712Cravey, Jacob1000101505
72874672874632022-05-1916:03:26.0513Cravey, Jacob1000101505
72874772874742022-05-1916:03:55.9421Cravey, Jacob1000101505
72874872874852022-05-1916:04:13.7622Cravey, Jacob1000101505
72874972874962022-05-1916:04:52.9231Cravey, Jacob1000101505
72875072875072022-05-1916:05:06.5132Cravey, Jacob1000101505
72875172875182022-05-1916:05:22.3333Cravey, Jacob1000101505
728752728752934Cravey, Jacob1000101505
7287867287864311Cravey, Jacob1000101505
#Alex Goff id
pitches[pitches$Pitcher == 'Goff, Alex', ]
ABCDEFGHIJ0123456789
 
 
X
<int>
PitchNo
<int>
Date
<chr>
Time
<chr>
PAofInning
<int>
PitchofPA
<int>
Pitcher
<chr>
PitcherId
<dbl>
98184981842232022-03-0218:10:59.4151Goff, Alex1000057623
98185981852242022-03-0218:11:20.1552Goff, Alex1000057623
98186981862252022-03-0218:11:39.6753Goff, Alex1000057623
98187981872262022-03-0218:12:04.0154Goff, Alex1000057623
98188981882272022-03-0218:12:47.7255Goff, Alex1000057623
98189981892282022-03-0218:13:24.1856Goff, Alex1000057623
98190981902292022-03-0218:13:58.9361Goff, Alex1000057623
98191981912302022-03-0218:14:21.6162Goff, Alex1000057623
98192981922312022-03-0218:14:49.2863Goff, Alex1000057623
98193981932322022-03-0218:15:08.3664Goff, Alex1000057623
#1000057623

#Evrything 98 and up goes to Goff
#Check entire rows first
pitches[pitches$GameID == '20220519-SamfordUniversity-2' & pitches$PitcherTeam == 'SAM_BUL' & pitches$pitch_count > 97, ]
ABCDEFGHIJ0123456789
 
 
X
<int>
PitchNo
<int>
Date
<chr>
Time
<chr>
PAofInning
<int>
PitchofPA
<int>
Pitcher
<chr>
PitcherId
<dbl>
7289847289842412022-05-1918:23:36.6061Cravey, Jacob1000101505
7289857289852422022-05-1918:24:12.1462Cravey, Jacob1000101505
7289867289862432022-05-1918:24:31.9863Cravey, Jacob1000101505
7289877289872442022-05-1918:24:57.0164Cravey, Jacob1000101505
7289887289882452022-05-1918:25:22.8365Cravey, Jacob1000101505
7289977289972542022-05-1918:33:18.3711Cravey, Jacob1000101505
7289987289982552022-05-1918:33:33.6712Cravey, Jacob1000101505
7289997289992562022-05-1918:34:11.6421Cravey, Jacob1000101505
7290007290002572022-05-1918:34:24.3822Cravey, Jacob1000101505
7290017290012582022-05-1918:34:39.2823Cravey, Jacob1000101505
#Now Replace
pitches$PitcherId[pitches$GameID == '20220519-SamfordUniversity-2' & pitches$PitcherTeam == 'SAM_BUL' & pitches$pitch_count > 97] <- 1000057623
#Replace Name too Why not
pitches$Pitcher[pitches$GameID == '20220519-SamfordUniversity-2' & pitches$PitcherTeam == 'SAM_BUL' & pitches$pitch_count > 97] <- 'Goff, Alex'

Tomasic, Connor only went 6.1 (100 pitches), Kyle Bischoff did the last 2.2 (40 pitches)

# Look at all pitches for this game
pitches[pitches$GameID == '20220519-UNebraska-1' & pitches$PitcherTeam == 'MIC_SPA', ]
ABCDEFGHIJ0123456789
 
 
X
<int>
PitchNo
<int>
Date
<chr>
Time
<chr>
PAofInning
<int>
PitchofPA
<int>
Pitcher
<chr>
PitcherId
<dbl>
731988731988172022-05-1918:43:48.6111Tomasic, Connor1000113049
731989731989182022-05-1918:44:04.9412Tomasic, Connor1000113049
731990731990192022-05-1918:44:19.2213Tomasic, Connor1000113049
731991731991202022-05-1918:44:33.5714Tomasic, Connor1000113049
731992731992212022-05-1918:45:01.9521Tomasic, Connor1000113049
731993731993222022-05-1918:45:16.9222Tomasic, Connor1000113049
731994731994232022-05-1918:45:37.4223Tomasic, Connor1000113049
731995731995242022-05-1918:45:55.7024Tomasic, Connor1000113049
731996731996252022-05-1918:46:18.9025Tomasic, Connor1000113049
731997731997262022-05-1918:47:03.1831Tomasic, Connor1000113049
#Kyle's id
pitches[pitches$Pitcher == 'Bischoff, Kyle', ]
ABCDEFGHIJ0123456789
 
 
X
<int>
PitchNo
<int>
Date
<chr>
Time
<chr>
PAofInning
<int>
PitchofPA
<int>
Pitcher
<chr>
PitcherId
<dbl>
28820288202672022-02-2014:53:26.4211Bischoff, Kyle1000014400
28821288212682022-02-2014:53:40.3612Bischoff, Kyle1000014400
28822288222692022-02-2014:54:12.3813Bischoff, Kyle1000014400
28823288232702022-02-2014:54:52.8614Bischoff, Kyle1000014400
28824288242712022-02-2014:55:08.3415Bischoff, Kyle1000014400
28825288252722022-02-2014:55:26.2416Bischoff, Kyle1000014400
28826288262732022-02-2014:56:21.6921Bischoff, Kyle1000014400
28827288272742022-02-2014:58:03.0831Bischoff, Kyle1000014400
28828288282752022-02-2014:58:55.1241Bischoff, Kyle1000014400
28829288292762022-02-2014:59:13.6742Bischoff, Kyle1000014400
#1000014400

#Evrything 78 and up goes to Goff
#Check entire rows first
pitches[pitches$GameID == '20220519-UNebraska-1' & pitches$PitcherTeam == 'MIC_SPA' & pitches$pitch_count > 100, ]
ABCDEFGHIJ0123456789
 
 
X
<int>
PitchNo
<int>
Date
<chr>
Time
<chr>
PAofInning
<int>
PitchofPA
<int>
Pitcher
<chr>
PitcherId
<dbl>
7321827321822112022-05-1920:29:44.3342Tomasic, Connor1000113049
7321977321972262022-05-1920:40:48.5911Tomasic, Connor1000113049
7321987321982272022-05-1920:41:02.9612Tomasic, Connor1000113049
7321997321992282022-05-1920:41:17.5613Tomasic, Connor1000113049
7322007322002292022-05-1920:41:36.4014Tomasic, Connor1000113049
7322017322012302022-05-1920:41:56.3615Tomasic, Connor1000113049
7322027322022312022-05-1920:42:42.4521Tomasic, Connor1000113049
7322037322032322022-05-1920:43:02.8422Tomasic, Connor1000113049
7322047322042332022-05-1920:43:46.1731Tomasic, Connor1000113049
7322057322052342022-05-1920:44:10.9132Tomasic, Connor1000113049
#Now Replace
pitches$PitcherId[pitches$GameID == '20220519-UNebraska-1' & pitches$PitcherTeam == 'MIC_SPA' & pitches$pitch_count > 100] <- 1000014400
#Replace Name too Why not
pitches$Pitcher[pitches$GameID == '20220519-UNebraska-1' & pitches$PitcherTeam == 'MIC_SPA' & pitches$pitch_count > 100] <- 'Bischoff, Kyle'

LAST ONE: Nick Dean only went five inning (87 pitches) Nigel Belgrave did 6 and 7. TOgether they threw 131 so theres ten missing or possibly correctly assigned to Belgrave 20220520-PurdueUniversity-1 https://umterps.com/sports/baseball/stats/2022/purdue/boxscore/12805

# Look at all pitches for this game
pitches[pitches$GameID == '20220520-PurdueUniversity-1' & pitches$PitcherTeam == 'MAR_TER', ]
ABCDEFGHIJ0123456789
 
 
X
<int>
PitchNo
<int>
Date
<chr>
Time
<chr>
PAofInning
<int>
PitchofPA
<int>
Pitcher
<chr>
PitcherId
<dbl>
74279274279282022-05-2017:07:18.7911Dean, Nick1000079033
74279374279392022-05-2017:07:30.5112Dean, Nick1000079033
742794742794102022-05-2017:07:42.3513Dean, Nick1000079033
742795742795112022-05-2017:07:54.6614Dean, Nick1000079033
742796742796122022-05-2017:08:29.4921Dean, Nick1000079033
742797742797132022-05-2017:08:46.8222Dean, Nick1000079033
742798742798142022-05-2017:08:59.3523Dean, Nick1000079033
742799742799152022-05-2017:10:11.2631Dean, Nick1000079033
742800742800162022-05-2017:10:47.0732Dean, Nick1000079033
742801742801172022-05-2017:11:19.3533Dean, Nick1000079033
#Nigel's id
pitches[pitches$Pitcher == 'Belgrave, Nigel', ]
ABCDEFGHIJ0123456789
 
 
X
<int>
PitchNo
<int>
Date
<chr>
Time
<chr>
PAofInning
<int>
PitchofPA
<int>
Pitcher
<chr>
PitcherId
<dbl>
13064130643102022-02-1918:10:03.3111Belgrave, Nigel1000100709
13065130653112022-02-1918:10:41.8021Belgrave, Nigel1000100709
13066130663122022-02-1918:10:59.2222Belgrave, Nigel1000100709
13067130673132022-02-1918:11:21.2623Belgrave, Nigel1000100709
13068130683142022-02-1918:11:39.1024Belgrave, Nigel1000100709
13069130693152022-02-1918:12:37.3531Belgrave, Nigel1000100709
13070130703162022-02-1918:12:55.8932Belgrave, Nigel1000100709
13071130713172022-02-1918:13:15.2033Belgrave, Nigel1000100709
13072130723182022-02-1918:13:33.3134Belgrave, Nigel1000100709
13073130733192022-02-1918:14:31.1041Belgrave, Nigel1000100709
#1000100709


#Evrything 89 to 142 goes to NIgel
#Check entire rows first
pitches[pitches$GameID == '20220520-PurdueUniversity-1' & pitches$PitcherTeam == 'MAR_TER' & pitches$pitch_count > 88 & pitches$pitch_count < 143, ]
ABCDEFGHIJ0123456789
 
 
X
<int>
PitchNo
<int>
Date
<chr>
Time
<chr>
PAofInning
<int>
PitchofPA
<int>
Pitcher
<chr>
PitcherId
<dbl>
7429717429711872022-05-2019:01:16.1511Dean, Nick1000079033
7429727429721882022-05-2019:01:31.1012Dean, Nick1000079033
7429737429731892022-05-2019:01:45.1213Dean, Nick1000079033
7429747429741902022-05-2019:02:16.2121Dean, Nick1000079033
7429757429751912022-05-2019:02:32.0022Dean, Nick1000079033
7429767429761922022-05-2019:03:11.7331Dean, Nick1000079033
7429777429771932022-05-2019:03:32.7432Dean, Nick1000079033
7429787429781942022-05-2019:04:09.0533Dean, Nick1000079033
7429797429791952022-05-2019:04:38.8634Dean, Nick1000079033
7429807429801962022-05-2019:05:03.6235Dean, Nick1000079033
#Now Replace
pitches$PitcherId[pitches$GameID == '20220520-PurdueUniversity-1' & pitches$PitcherTeam == 'MAR_TER' & pitches$pitch_count > 88 & pitches$pitch_count < 143] <- 1000100709
#Replace Name too Why not
pitches$Pitcher[pitches$GameID == '20220520-PurdueUniversity-1' & pitches$PitcherTeam == 'MAR_TER' & pitches$pitch_count > 88 & pitches$pitch_count < 143] <- 'Belgrave, Nigel'

Unfortunatley there are probably other instances where the trackman operator didnt record a pitching change and some pitches early in a reliever’s outing look like they came late in the starters. We caught the worst cases where it put pitchers above 120 pitches but there could be more. I don’t think it’s feasible to look through 1.2 million pitches and double check them all.

I’m going to runthe chunks that assign pitch counts again now that the pitcher IDs are accurate

Add a column that specifies how many pitches a pitcher has thrown

pitches$pitch_count <- with(pitches, ave(seq_along(paste(GameID, PitcherId)), paste(GameID, PitcherId), FUN = seq_along)) - 1

# Add a new factor column to the dataframe for the pitch group
pitches$pitch_group <- as.factor(ifelse(pitches$pitch_count < 100, (pitches$pitch_count) %/% 10 + 1, 11))

# Check the updated dataframe
head(pitches, 250)
ABCDEFGHIJ0123456789
 
 
X
<int>
PitchNo
<int>
Date
<chr>
Time
<chr>
PAofInning
<int>
PitchofPA
<int>
Pitcher
<chr>
PitcherId
<dbl>
1112022-02-1813:32:19.8611Kniskern, Trevor1000054486
2222022-02-1813:32:36.0012Kniskern, Trevor1000054486
3332022-02-1813:33:12.4513Kniskern, Trevor1000054486
4442022-02-1813:33:53.1721Kniskern, Trevor1000054486
5552022-02-1813:34:10.2822Kniskern, Trevor1000054486
6662022-02-1813:34:29.8023Kniskern, Trevor1000054486
7772022-02-1813:34:50.3624Kniskern, Trevor1000054486
8882022-02-1813:35:24.4225Kniskern, Trevor1000054486
9992022-02-1813:36:11.6931Kniskern, Trevor1000054486
1010102022-02-1813:36:36.9032Kniskern, Trevor1000054486

I want better names for the pitch_group levels

pitches$pitch_bin <- pitches$pitch_group

pitches$pitch_group <- NA

pitches$pitch_group[pitches$pitch_bin == '1'] <- '0-9 Pitches'
pitches$pitch_group[pitches$pitch_bin == '2'] <- '10-19 Pitches'
pitches$pitch_group[pitches$pitch_bin == '3'] <- '20-29 Pitches'
pitches$pitch_group[pitches$pitch_bin == '4'] <- '30-39 Pitches'
pitches$pitch_group[pitches$pitch_bin == '5'] <- '40-49 Pitches'
pitches$pitch_group[pitches$pitch_bin == '6'] <- '50-59 Pitches'
pitches$pitch_group[pitches$pitch_bin == '7'] <- '60-69 Pitches'
pitches$pitch_group[pitches$pitch_bin == '8'] <- '70-79 Pitches'
pitches$pitch_group[pitches$pitch_bin == '9'] <- '80-89 Pitches'
pitches$pitch_group[pitches$pitch_bin == '10'] <- '90-99 Pitches'
pitches$pitch_group[pitches$pitch_bin == '11'] <- 'More Than 100 Pitches'

#MAke sure the order is correct. Really annoying if regression output isn't in ascending order
sqldf("SELECT pitch_group, count(*) from pitches GROUP BY pitch_group ORDER BY pitch_group")
ABCDEFGHIJ0123456789
pitch_group
<chr>
count(*)
<int>
0-9 Pitches297887
10-19 Pitches240230
20-29 Pitches171000
30-39 Pitches122061
40-49 Pitches90347
50-59 Pitches69969
60-69 Pitches55980
70-79 Pitches42674
80-89 Pitches29738
90-99 Pitches16662
write.csv(pitches, 'C:\\Users\\Nick\\UCSB Baseball\\All_College_TM_19_22_Mistakes_Fixed_Pitchcounts_added.csv')
pitches <- read.csv('C:\\Users\\Nick\\UCSB Baseball\\All_College_TM_19_22_Mistakes_Fixed_Pitchcounts_added.csv')

THeres a chunk where i created a result column in the other markdown above the chunk below. Not sure if I did it again later on so I’m not including that for now

Horizontal break is meaningless if theres no distinction made between lefties and righties

pitches$Hbrk <- NA

pitches$Hbrk[pitches$PitcherThrows == 'Right'] <- -pitches$HorzBreak
## Warning in pitches$Hbrk[pitches$PitcherThrows == "Right"] <- -pitches$HorzBreak:
## number of items to replace is not a multiple of replacement length
pitches$Hbrk[pitches$PitcherThrows == 'Left'] <- pitches$HorzBreak
## Warning in pitches$Hbrk[pitches$PitcherThrows == "Left"] <- pitches$HorzBreak:
## number of items to replace is not a multiple of replacement length
pitches %>% group_by(PitcherThrows) %>% summarise(n =n())
ABCDEFGHIJ0123456789
PitcherThrows
<chr>
n
<int>
Left315035
Right829400
RIght17
Undefined3

##Year Averages used for differences

##Find averages for the dependent measures; Velovity, Spin Rate, Break,     Still try to do command, result based metric


pitcher_year_group <- sqldf("select PitcherId, TaggedPitchType, SUBSTRING(Date, 1, 4) as YEAR, MAX(Pitcher) as PitcherName, MAX(PitcherTeam) as Team, AVG(RelSpeed) AS avg_RelSpeedYear, AVG(SpinRate) AS avg_spinrateYear, AVG(InducedVertBReak) AS avg_IndVertBrkYear, AVG(Hbrk) As avg_HBrkYear, COUNT(*) as PitchesThrown FROM pitches  WHERE PitcherId is not null GROUP BY PitcherId, SUBSTRING(Date, 1, 4), TaggedPitchType ORDER BY COUNT(*) DESC")

Relevel Pitch_group. MAke 0-9 the baseline

pitches$pitch_group <- relevel(factor(pitches$pitch_group), ref = '0-9 Pitches')

Next we have to merge it so that summary stats for each pitch type of each pitcher are included for in each row, later we’ll also calculate differences between each individual pitch numbers and the averages

pitches$YEAR = NA
pitches$YEAR <- substr(pitches$Date, 0, 4)
pitcherYearAvgs <- left_join(pitches, pitcher_year_group, on = c('PitcherId', 'YEAR'))
## Joining, by = c("PitcherId", "TaggedPitchType", "YEAR")
head(pitcherYearAvgs, 50)
ABCDEFGHIJ0123456789
 
 
X.1
<int>
X
<int>
PitchNo
<int>
Date
<chr>
Time
<chr>
PAofInning
<int>
PitchofPA
<int>
Pitcher
<chr>
PitcherId
<dbl>
11112022-02-1813:32:19.8611Kniskern, Trevor1000054486
22222022-02-1813:32:36.0012Kniskern, Trevor1000054486
33332022-02-1813:33:12.4513Kniskern, Trevor1000054486
44442022-02-1813:33:53.1721Kniskern, Trevor1000054486
55552022-02-1813:34:10.2822Kniskern, Trevor1000054486
66662022-02-1813:34:29.8023Kniskern, Trevor1000054486
77772022-02-1813:34:50.3624Kniskern, Trevor1000054486
88882022-02-1813:35:24.4225Kniskern, Trevor1000054486
99992022-02-1813:36:11.6931Kniskern, Trevor1000054486
101010102022-02-1813:36:36.9032Kniskern, Trevor1000054486
pitcherYearAvgs$RelSpeedDiffYear <- pitcherYearAvgs$RelSpeed - pitcherYearAvgs$avg_RelSpeedYear
pitcherYearAvgs$VertBreakDiffYear <- pitcherYearAvgs$InducedVertBreak - pitcherYearAvgs$avg_IndVertBrkYear
pitcherYearAvgs$HorzBreakDiffYear <- pitcherYearAvgs$Hbrk - pitcherYearAvgs$avg_HBrkYear
pitcherYearAvgs$SpinRateDiffYear <- pitcherYearAvgs$SpinRate - pitcherYearAvgs$avg_spinrateYear

Year avgs would work, and might be better, for WHIP regressions and other results based tests.

HAve to make individual dfs for each pitch type

fastballsYear <- pitcherYearAvgs[pitcherYearAvgs$TaggedPitchType == 'Fastball', ]
changeupsYear <- pitcherYearAvgs[pitcherYearAvgs$TaggedPitchType == 'ChangeUp', ]
curveballsYear <- pitcherYearAvgs[pitcherYearAvgs$TaggedPitchType == 'Curveball', ]
cuttersYear <- pitcherYearAvgs[pitcherYearAvgs$TaggedPitchType == 'Cutter', ]
slidersYear <- pitcherYearAvgs[pitcherYearAvgs$TaggedPitchType == 'Slider', ]
sinkersYear <- pitcherYearAvgs[pitcherYearAvgs$TaggedPitchType == 'Sinker', ]
splittersYear <- pitcherYearAvgs[pitcherYearAvgs$TaggedPitchType == 'Splitter', ]

##Regressions

###Continuous

Fastballs

fastballSpinRateYear <- lm(SpinRateDiffYear ~ pitch_count, data = fastballsYear)
summary(fastballSpinRateYear)
## 
## Call:
## lm(formula = SpinRateDiffYear ~ pitch_count, data = fastballsYear)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -1442.50   -56.15     4.81    62.88  1540.86 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  5.241863   0.192084   27.29   <2e-16 ***
## pitch_count -0.191874   0.005283  -36.32   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 100.9 on 634095 degrees of freedom
##   (3401 observations deleted due to missingness)
## Multiple R-squared:  0.002076,   Adjusted R-squared:  0.002074 
## F-statistic:  1319 on 1 and 634095 DF,  p-value: < 2.2e-16
fastballRelSpeedYear <- lm(RelSpeedDiffYear ~ pitch_count, data = fastballsYear)
summary(fastballRelSpeedYear)
## 
## Call:
## lm(formula = RelSpeedDiffYear ~ pitch_count, data = fastballsYear)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -39.933  -0.768   0.072   0.881  14.643 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  0.3544947  0.0027376   129.5   <2e-16 ***
## pitch_count -0.0129755  0.0000753  -172.3   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.439 on 635019 degrees of freedom
##   (2477 observations deleted due to missingness)
## Multiple R-squared:  0.04468,    Adjusted R-squared:  0.04467 
## F-statistic: 2.97e+04 on 1 and 635019 DF,  p-value: < 2.2e-16
fastballVertBreakYear <- lm(VertBreakDiffYear ~ pitch_count, data = fastballsYear)
summary(fastballVertBreakYear)
## 
## Call:
## lm(formula = VertBreakDiffYear ~ pitch_count, data = fastballsYear)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -41.517  -1.766   0.073   1.869  33.974 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  0.0438247  0.0058778   7.456 8.93e-14 ***
## pitch_count -0.0016038  0.0001616  -9.922  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 3.085 on 632796 degrees of freedom
##   (4700 observations deleted due to missingness)
## Multiple R-squared:  0.0001555,  Adjusted R-squared:  0.000154 
## F-statistic: 98.44 on 1 and 632796 DF,  p-value: < 2.2e-16
fastballHorzBreakYear <- lm(HorzBreakDiffYear ~ pitch_count, data = fastballsYear)
summary(fastballHorzBreakYear)
## 
## Call:
## lm(formula = HorzBreakDiffYear ~ pitch_count, data = fastballsYear)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -44.538  -9.443  -0.854   9.534  37.277 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.0256665  0.0214953  -1.194    0.232
## pitch_count  0.0009401  0.0005915   1.589    0.112
## 
## Residual standard error: 11.25 on 629345 degrees of freedom
##   (8151 observations deleted due to missingness)
## Multiple R-squared:  4.014e-06,  Adjusted R-squared:  2.425e-06 
## F-statistic: 2.526 on 1 and 629345 DF,  p-value: 0.112

Change Up

changeupSpinRateYear <- lm(SpinRateDiffYear ~ pitch_count, data = changeupsYear)
summary(changeupSpinRateYear)
## 
## Call:
## lm(formula = SpinRateDiffYear ~ pitch_count, data = changeupsYear)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -1150.81   -83.80    -6.27    70.90  2408.49 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  11.6678     0.7466   15.63   <2e-16 ***
## pitch_count  -0.3628     0.0183  -19.82   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 161.2 on 123178 degrees of freedom
##   (619 observations deleted due to missingness)
## Multiple R-squared:  0.003181,   Adjusted R-squared:  0.003172 
## F-statistic:   393 on 1 and 123178 DF,  p-value: < 2.2e-16
changeupRelSpeedYear <- lm(RelSpeedDiffYear ~ pitch_count, data = changeupsYear)
summary(changeupRelSpeedYear)
## 
## Call:
## lm(formula = RelSpeedDiffYear ~ pitch_count, data = changeupsYear)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -22.6986  -0.8695   0.0089   0.8904  12.9319 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  0.2661535  0.0072789   36.56   <2e-16 ***
## pitch_count -0.0082802  0.0001785  -46.39   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.575 on 123591 degrees of freedom
##   (206 observations deleted due to missingness)
## Multiple R-squared:  0.01712,    Adjusted R-squared:  0.01711 
## F-statistic:  2152 on 1 and 123591 DF,  p-value: < 2.2e-16
changeupVertBreakYear <- lm(VertBreakDiffYear ~ pitch_count, data = changeupsYear)
summary(changeupVertBreakYear)
## 
## Call:
## lm(formula = VertBreakDiffYear ~ pitch_count, data = changeupsYear)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -35.704  -2.196   0.033   2.283  21.938 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -0.0979812  0.0179679  -5.453 4.96e-08 ***
## pitch_count  0.0030479  0.0004406   6.918 4.59e-12 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 3.881 on 123219 degrees of freedom
##   (578 observations deleted due to missingness)
## Multiple R-squared:  0.0003883,  Adjusted R-squared:  0.0003802 
## F-statistic: 47.86 on 1 and 123219 DF,  p-value: 4.591e-12
changeupHorzBreakYear <- lm(HorzBreakDiffYear ~ pitch_count, data = changeupsYear)
summary(changeupHorzBreakYear)
## 
## Call:
## lm(formula = HorzBreakDiffYear ~ pitch_count, data = changeupsYear)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -33.727  -9.009  -0.349   9.099  33.723 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.035870   0.051072  -0.702    0.482
## pitch_count  0.001116   0.001252   0.891    0.373
## 
## Residual standard error: 10.99 on 122247 degrees of freedom
##   (1550 observations deleted due to missingness)
## Multiple R-squared:  6.496e-06,  Adjusted R-squared:  -1.684e-06 
## F-statistic: 0.7942 on 1 and 122247 DF,  p-value: 0.3728

Curveballs

curveballSpinRateYear <- lm(SpinRateDiffYear ~ pitch_count, data = curveballsYear)
summary(curveballSpinRateYear)
## 
## Call:
## lm(formula = SpinRateDiffYear ~ pitch_count, data = curveballsYear)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -1560.49   -58.13     6.70    70.89  1955.26 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  5.59784    0.71775   7.799  6.3e-15 ***
## pitch_count -0.18349    0.01817 -10.097  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 134.9 on 87522 degrees of freedom
##   (1013 observations deleted due to missingness)
## Multiple R-squared:  0.001164,   Adjusted R-squared:  0.001152 
## F-statistic:   102 on 1 and 87522 DF,  p-value: < 2.2e-16
curveballRelSpeedYear <- lm(RelSpeedDiffYear ~ pitch_count, data = curveballsYear)
summary(curveballRelSpeedYear)
## 
## Call:
## lm(formula = RelSpeedDiffYear ~ pitch_count, data = curveballsYear)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -10.1420  -0.9747  -0.0423   0.9135  19.9656 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  0.1894395  0.0087500   21.65   <2e-16 ***
## pitch_count -0.0062093  0.0002215  -28.03   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.651 on 88291 degrees of freedom
##   (244 observations deleted due to missingness)
## Multiple R-squared:  0.008819,   Adjusted R-squared:  0.008808 
## F-statistic: 785.5 on 1 and 88291 DF,  p-value: < 2.2e-16
curveballVertBreakYear <- lm(VertBreakDiffYear ~ pitch_count, data = curveballsYear)
summary(curveballVertBreakYear)
## 
## Call:
## lm(formula = VertBreakDiffYear ~ pitch_count, data = curveballsYear)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -24.273  -2.232  -0.176   1.876  39.805 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -0.0644487  0.0205406  -3.138   0.0017 ** 
## pitch_count  0.0021101  0.0005196   4.061 4.89e-05 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 3.866 on 87887 degrees of freedom
##   (648 observations deleted due to missingness)
## Multiple R-squared:  0.0001876,  Adjusted R-squared:  0.0001762 
## F-statistic: 16.49 on 1 and 87887 DF,  p-value: 4.892e-05
curveballHorzBreakYear <- lm(HorzBreakDiffYear ~ pitch_count, data = curveballsYear)
summary(curveballHorzBreakYear)
## 
## Call:
## lm(formula = HorzBreakDiffYear ~ pitch_count, data = curveballsYear)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -43.637  -8.962  -0.412   8.981  34.137 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.0052652  0.0582718  -0.090    0.928
## pitch_count  0.0001725  0.0014749   0.117    0.907
## 
## Residual standard error: 10.94 on 87453 degrees of freedom
##   (1082 observations deleted due to missingness)
## Multiple R-squared:  1.564e-07,  Adjusted R-squared:  -1.128e-05 
## F-statistic: 0.01368 on 1 and 87453 DF,  p-value: 0.9069

CUtters

cutterSpinRateYear <- lm(SpinRateDiffYear ~ pitch_count, data = cuttersYear)
summary(cutterSpinRateYear)
## 
## Call:
## lm(formula = SpinRateDiffYear ~ pitch_count, data = cuttersYear)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -1278.40   -53.47     3.33    61.35  1368.95 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  8.14612    1.55200   5.249 1.55e-07 ***
## pitch_count -0.27648    0.04013  -6.890 5.85e-12 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 115.9 on 13288 degrees of freedom
##   (110 observations deleted due to missingness)
## Multiple R-squared:  0.003559,   Adjusted R-squared:  0.003484 
## F-statistic: 47.47 on 1 and 13288 DF,  p-value: 5.848e-12
cutterlRelSpeedYear <- lm(RelSpeedDiffYear ~ pitch_count, data = cuttersYear)
summary(cutterlRelSpeedYear)
## 
## Call:
## lm(formula = RelSpeedDiffYear ~ pitch_count, data = cuttersYear)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -13.2257  -0.8456   0.0077   0.8685  11.4163 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  0.2166739  0.0202787   10.69   <2e-16 ***
## pitch_count -0.0073515  0.0005242  -14.03   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.519 on 13371 degrees of freedom
##   (27 observations deleted due to missingness)
## Multiple R-squared:  0.0145, Adjusted R-squared:  0.01442 
## F-statistic: 196.7 on 1 and 13371 DF,  p-value: < 2.2e-16
cutterVertBreakYear <- lm(VertBreakDiffYear ~ pitch_count, data = cuttersYear)
summary(cutterVertBreakYear)
## 
## Call:
## lm(formula = VertBreakDiffYear ~ pitch_count, data = cuttersYear)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -23.7201  -1.8558  -0.0654   1.7084  22.0430 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)  
## (Intercept) -0.083845   0.042984  -1.951   0.0511 .
## pitch_count  0.002844   0.001111   2.560   0.0105 *
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 3.217 on 13352 degrees of freedom
##   (46 observations deleted due to missingness)
## Multiple R-squared:  0.0004906,  Adjusted R-squared:  0.0004158 
## F-statistic: 6.554 on 1 and 13352 DF,  p-value: 0.01047
cutterHorzBreakYear <- lm(HorzBreakDiffYear ~ pitch_count, data = cuttersYear)
summary(cutterHorzBreakYear)
## 
## Call:
## lm(formula = HorzBreakDiffYear ~ pitch_count, data = cuttersYear)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -27.495  -8.992  -1.154   9.133  33.669 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)  
## (Intercept)  0.250328   0.147726   1.695   0.0902 .
## pitch_count -0.008522   0.003829  -2.225   0.0261 *
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 11.02 on 13242 degrees of freedom
##   (156 observations deleted due to missingness)
## Multiple R-squared:  0.0003738,  Adjusted R-squared:  0.0002983 
## F-statistic: 4.952 on 1 and 13242 DF,  p-value: 0.02608

Sliders

sliderSpinRateYear <- lm(SpinRateDiffYear ~ pitch_count, data = slidersYear)
summary(sliderSpinRateYear)
## 
## Call:
## lm(formula = SpinRateDiffYear ~ pitch_count, data = slidersYear)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -1916.90   -58.25    10.43    77.45  2294.62 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  6.30553    0.51832   12.16   <2e-16 ***
## pitch_count -0.21943    0.01377  -15.94   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 153.2 on 209264 degrees of freedom
##   (4771 observations deleted due to missingness)
## Multiple R-squared:  0.001212,   Adjusted R-squared:  0.001208 
## F-statistic:   254 on 1 and 209264 DF,  p-value: < 2.2e-16
sliderlRelSpeedYear <- lm(RelSpeedDiffYear ~ pitch_count, data = slidersYear)
summary(sliderlRelSpeedYear)
## 
## Call:
## lm(formula = RelSpeedDiffYear ~ pitch_count, data = slidersYear)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -28.5727  -0.9945  -0.0015   0.9936  16.4665 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  0.2058581  0.0058695   35.07   <2e-16 ***
## pitch_count -0.0071664  0.0001559  -45.96   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.753 on 213530 degrees of freedom
##   (505 observations deleted due to missingness)
## Multiple R-squared:  0.009794,   Adjusted R-squared:  0.009789 
## F-statistic:  2112 on 1 and 213530 DF,  p-value: < 2.2e-16
sliderVertBreakYear <- lm(VertBreakDiffYear ~ pitch_count, data = slidersYear)
summary(sliderVertBreakYear)
## 
## Call:
## lm(formula = VertBreakDiffYear ~ pitch_count, data = slidersYear)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -27.222  -2.179  -0.039   2.082  56.100 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)  
## (Intercept) -0.0178076  0.0129526  -1.375   0.1692  
## pitch_count  0.0006200  0.0003442   1.801   0.0716 .
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 3.86 on 212707 degrees of freedom
##   (1328 observations deleted due to missingness)
## Multiple R-squared:  1.526e-05,  Adjusted R-squared:  1.056e-05 
## F-statistic: 3.245 on 1 and 212707 DF,  p-value: 0.07163
sliderHorzBreakYear <- lm(HorzBreakDiffYear ~ pitch_count, data = slidersYear)
summary(sliderHorzBreakYear)
## 
## Call:
## lm(formula = HorzBreakDiffYear ~ pitch_count, data = slidersYear)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -34.524  -9.244  -0.921   9.311  38.292 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.027955   0.037466  -0.746    0.456
## pitch_count  0.000975   0.000997   0.978    0.328
## 
## Residual standard error: 11.13 on 211266 degrees of freedom
##   (2769 observations deleted due to missingness)
## Multiple R-squared:  4.527e-06,  Adjusted R-squared:  -2.068e-07 
## F-statistic: 0.9563 on 1 and 211266 DF,  p-value: 0.3281

Sinkers

sliderSpinRateYear <- lm(SpinRateDiffYear ~ pitch_count, data = slidersYear)
summary(sliderSpinRateYear)
## 
## Call:
## lm(formula = SpinRateDiffYear ~ pitch_count, data = slidersYear)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -1916.90   -58.25    10.43    77.45  2294.62 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  6.30553    0.51832   12.16   <2e-16 ***
## pitch_count -0.21943    0.01377  -15.94   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 153.2 on 209264 degrees of freedom
##   (4771 observations deleted due to missingness)
## Multiple R-squared:  0.001212,   Adjusted R-squared:  0.001208 
## F-statistic:   254 on 1 and 209264 DF,  p-value: < 2.2e-16
sliderlRelSpeedYear <- lm(RelSpeedDiffYear ~ pitch_count, data = slidersYear)
summary(sliderlRelSpeedYear)
## 
## Call:
## lm(formula = RelSpeedDiffYear ~ pitch_count, data = slidersYear)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -28.5727  -0.9945  -0.0015   0.9936  16.4665 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  0.2058581  0.0058695   35.07   <2e-16 ***
## pitch_count -0.0071664  0.0001559  -45.96   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.753 on 213530 degrees of freedom
##   (505 observations deleted due to missingness)
## Multiple R-squared:  0.009794,   Adjusted R-squared:  0.009789 
## F-statistic:  2112 on 1 and 213530 DF,  p-value: < 2.2e-16
sliderVertBreakYear <- lm(VertBreakDiffYear ~ pitch_count, data = slidersYear)
summary(sliderVertBreakYear)
## 
## Call:
## lm(formula = VertBreakDiffYear ~ pitch_count, data = slidersYear)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -27.222  -2.179  -0.039   2.082  56.100 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)  
## (Intercept) -0.0178076  0.0129526  -1.375   0.1692  
## pitch_count  0.0006200  0.0003442   1.801   0.0716 .
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 3.86 on 212707 degrees of freedom
##   (1328 observations deleted due to missingness)
## Multiple R-squared:  1.526e-05,  Adjusted R-squared:  1.056e-05 
## F-statistic: 3.245 on 1 and 212707 DF,  p-value: 0.07163
sliderHorzBreakYear <- lm(HorzBreakDiffYear ~ pitch_count, data = slidersYear)
summary(sliderHorzBreakYear)
## 
## Call:
## lm(formula = HorzBreakDiffYear ~ pitch_count, data = slidersYear)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -34.524  -9.244  -0.921   9.311  38.292 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.027955   0.037466  -0.746    0.456
## pitch_count  0.000975   0.000997   0.978    0.328
## 
## Residual standard error: 11.13 on 211266 degrees of freedom
##   (2769 observations deleted due to missingness)
## Multiple R-squared:  4.527e-06,  Adjusted R-squared:  -2.068e-07 
## F-statistic: 0.9563 on 1 and 211266 DF,  p-value: 0.3281

##Factor Level (Pitch_groups)

Fastballs

fastballSpinRateBinYear <- lm(SpinRateDiffYear ~ pitch_group, data = fastballsYear)
summary(fastballSpinRateBinYear)
## 
## Call:
## lm(formula = SpinRateDiffYear ~ pitch_group, data = fastballsYear)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -1445.55   -56.12     4.81    62.86  1542.62 
## 
## Coefficients:
##                                  Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                        2.7206     0.2404  11.317  < 2e-16 ***
## pitch_group10-19 Pitches           1.9217     0.3666   5.242 1.59e-07 ***
## pitch_group20-29 Pitches          -0.3470     0.4054  -0.856    0.392    
## pitch_group30-39 Pitches          -3.9807     0.4592  -8.669  < 2e-16 ***
## pitch_group40-49 Pitches          -6.1259     0.5215 -11.747  < 2e-16 ***
## pitch_group50-59 Pitches          -8.0382     0.5804 -13.850  < 2e-16 ***
## pitch_group60-69 Pitches         -12.3295     0.6343 -19.438  < 2e-16 ***
## pitch_group70-79 Pitches         -15.3186     0.7194 -21.295  < 2e-16 ***
## pitch_group80-89 Pitches         -15.7449     0.8620 -18.266  < 2e-16 ***
## pitch_group90-99 Pitches         -13.2271     1.1265 -11.742  < 2e-16 ***
## pitch_groupMore Than 100 Pitches -12.4073     1.6155  -7.680 1.59e-14 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 100.9 on 634086 degrees of freedom
##   (3401 observations deleted due to missingness)
## Multiple R-squared:  0.002544,   Adjusted R-squared:  0.002528 
## F-statistic: 161.7 on 10 and 634086 DF,  p-value: < 2.2e-16
#fastballSR <- as.data.frame(fastballSpinRateBinYear$coefficients)

fastballRelSpeedBinYear <- lm(RelSpeedDiffYear ~ pitch_group, data = fastballsYear)
summary(fastballRelSpeedBinYear)
## 
## Call:
## lm(formula = RelSpeedDiffYear ~ pitch_group, data = fastballsYear)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -39.843  -0.766   0.074   0.880  14.762 
## 
## Coefficients:
##                                   Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                       0.361035   0.003423  105.46   <2e-16 ***
## pitch_group10-19 Pitches         -0.193071   0.005220  -36.98   <2e-16 ***
## pitch_group20-29 Pitches         -0.382668   0.005773  -66.28   <2e-16 ***
## pitch_group30-39 Pitches         -0.527655   0.006540  -80.69   <2e-16 ***
## pitch_group40-49 Pitches         -0.641584   0.007426  -86.40   <2e-16 ***
## pitch_group50-59 Pitches         -0.732228   0.008264  -88.60   <2e-16 ***
## pitch_group60-69 Pitches         -0.884686   0.009033  -97.94   <2e-16 ***
## pitch_group70-79 Pitches         -0.955556   0.010243  -93.29   <2e-16 ***
## pitch_group80-89 Pitches         -0.989537   0.012275  -80.62   <2e-16 ***
## pitch_group90-99 Pitches         -1.021188   0.016038  -63.67   <2e-16 ***
## pitch_groupMore Than 100 Pitches -0.953133   0.022994  -41.45   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.438 on 635010 degrees of freedom
##   (2477 observations deleted due to missingness)
## Multiple R-squared:  0.04671,    Adjusted R-squared:  0.04669 
## F-statistic:  3111 on 10 and 635010 DF,  p-value: < 2.2e-16
#as.data.frame(fastballRelSpeedBinYear$coefficients)

fastballVertBreakBinYear <- lm(VertBreakDiffYear ~ pitch_group, data = fastballsYear)
summary(fastballVertBreakBinYear)
## 
## Call:
## lm(formula = VertBreakDiffYear ~ pitch_group, data = fastballsYear)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -41.507  -1.766   0.073   1.870  33.978 
## 
## Coefficients:
##                                   Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                       0.030503   0.007358   4.146 3.39e-05 ***
## pitch_group10-19 Pitches         -0.012767   0.011221  -1.138 0.255216    
## pitch_group20-29 Pitches         -0.008705   0.012408  -0.702 0.482983    
## pitch_group30-39 Pitches         -0.052571   0.014054  -3.741 0.000184 ***
## pitch_group40-49 Pitches         -0.054728   0.015958  -3.429 0.000605 ***
## pitch_group50-59 Pitches         -0.047091   0.017759  -2.652 0.008009 ** 
## pitch_group60-69 Pitches         -0.070036   0.019409  -3.608 0.000308 ***
## pitch_group70-79 Pitches         -0.114292   0.022010  -5.193 2.07e-07 ***
## pitch_group80-89 Pitches         -0.118241   0.026363  -4.485 7.29e-06 ***
## pitch_group90-99 Pitches         -0.230369   0.034469  -6.683 2.34e-11 ***
## pitch_groupMore Than 100 Pitches -0.174715   0.049467  -3.532 0.000413 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 3.085 on 632787 degrees of freedom
##   (4700 observations deleted due to missingness)
## Multiple R-squared:  0.0001751,  Adjusted R-squared:  0.0001593 
## F-statistic: 11.08 on 10 and 632787 DF,  p-value: < 2.2e-16
#fastballV <- as.data.frame(fastballVertBreakBinYear$coefficients)

fastballHorzBreakBinYear <- lm(HorzBreakDiffYear ~ pitch_group, data = fastballsYear)
summary(fastballHorzBreakBinYear)
## 
## Call:
## lm(formula = HorzBreakDiffYear ~ pitch_group, data = fastballsYear)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -44.509  -9.444  -0.856   9.532  37.276 
## 
## Coefficients:
##                                  Estimate Std. Error t value Pr(>|t|)  
## (Intercept)                      -0.04607    0.02690  -1.713   0.0867 .
## pitch_group10-19 Pitches          0.03736    0.04104   0.910   0.3626  
## pitch_group20-29 Pitches          0.08406    0.04539   1.852   0.0640 .
## pitch_group30-39 Pitches          0.05912    0.05142   1.150   0.2502  
## pitch_group40-49 Pitches          0.06343    0.05837   1.087   0.2772  
## pitch_group50-59 Pitches          0.13665    0.06498   2.103   0.0355 *
## pitch_group60-69 Pitches          0.14970    0.07103   2.107   0.0351 *
## pitch_group70-79 Pitches         -0.08579    0.08058  -1.065   0.2870  
## pitch_group80-89 Pitches          0.06636    0.09653   0.687   0.4918  
## pitch_group90-99 Pitches          0.04705    0.12637   0.372   0.7096  
## pitch_groupMore Than 100 Pitches  0.09878    0.18080   0.546   0.5848  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 11.25 on 629336 degrees of freedom
##   (8151 observations deleted due to missingness)
## Multiple R-squared:  1.95e-05,   Adjusted R-squared:  3.609e-06 
## F-statistic: 1.227 on 10 and 629336 DF,  p-value: 0.2673
#fastballH <- as.data.frame(fastballHorzBreakBinYear$coefficients)
changeupSpinRateBinYear <- lm(SpinRateDiffYear ~ pitch_group, data = changeupsYear)
summary(changeupSpinRateBinYear)
## 
## Call:
## lm(formula = SpinRateDiffYear ~ pitch_group, data = changeupsYear)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -1151.00   -83.82    -6.54    70.98  2409.04 
## 
## Coefficients:
##                                  Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                         8.962      1.011   8.867  < 2e-16 ***
## pitch_group10-19 Pitches           -1.804      1.442  -1.251  0.21078    
## pitch_group20-29 Pitches           -5.467      1.578  -3.465  0.00053 ***
## pitch_group30-39 Pitches           -8.598      1.694  -5.075 3.89e-07 ***
## pitch_group40-49 Pitches          -14.517      1.791  -8.105 5.31e-16 ***
## pitch_group50-59 Pitches          -17.987      1.961  -9.173  < 2e-16 ***
## pitch_group60-69 Pitches          -18.682      2.143  -8.719  < 2e-16 ***
## pitch_group70-79 Pitches          -27.402      2.354 -11.640  < 2e-16 ***
## pitch_group80-89 Pitches          -27.361      2.707 -10.107  < 2e-16 ***
## pitch_group90-99 Pitches          -29.918      3.585  -8.346  < 2e-16 ***
## pitch_groupMore Than 100 Pitches  -33.496      5.335  -6.278 3.44e-10 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 161.2 on 123169 degrees of freedom
##   (619 observations deleted due to missingness)
## Multiple R-squared:  0.003179,   Adjusted R-squared:  0.003098 
## F-statistic: 39.29 on 10 and 123169 DF,  p-value: < 2.2e-16
#as.data.frame(changeupSpinRateBinYear$coefficients)

changeupRelSpeedBinYear <- lm(RelSpeedDiffYear ~ pitch_group, data = changeupsYear)
summary(changeupRelSpeedBinYear)
## 
## Call:
## lm(formula = RelSpeedDiffYear ~ pitch_group, data = changeupsYear)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -22.7409  -0.8689   0.0071   0.8889  12.8830 
## 
## Coefficients:
##                                   Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                       0.170935   0.009849  17.356  < 2e-16 ***
## pitch_group10-19 Pitches         -0.011529   0.014049  -0.821 0.411855    
## pitch_group20-29 Pitches         -0.052886   0.015376  -3.440 0.000583 ***
## pitch_group30-39 Pitches         -0.129105   0.016519  -7.815 5.52e-15 ***
## pitch_group40-49 Pitches         -0.254857   0.017464 -14.594  < 2e-16 ***
## pitch_group50-59 Pitches         -0.339838   0.019126 -17.769  < 2e-16 ***
## pitch_group60-69 Pitches         -0.494720   0.020887 -23.686  < 2e-16 ***
## pitch_group70-79 Pitches         -0.577324   0.022959 -25.146  < 2e-16 ***
## pitch_group80-89 Pitches         -0.645018   0.026410 -24.423  < 2e-16 ***
## pitch_group90-99 Pitches         -0.733432   0.034956 -20.981  < 2e-16 ***
## pitch_groupMore Than 100 Pitches -0.729123   0.051964 -14.031  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.574 on 123582 degrees of freedom
##   (206 observations deleted due to missingness)
## Multiple R-squared:  0.01806,    Adjusted R-squared:  0.01798 
## F-statistic: 227.3 on 10 and 123582 DF,  p-value: < 2.2e-16
#as.data.frame(changeupRelSpeedBinYear$coefficients)

changeupVertBreakBinYear <- lm(VertBreakDiffYear ~ pitch_group, data = changeupsYear)
summary(changeupVertBreakBinYear)
## 
## Call:
## lm(formula = VertBreakDiffYear ~ pitch_group, data = changeupsYear)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -35.715  -2.195   0.030   2.282  21.855 
## 
## Coefficients:
##                                  Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                      -0.10182    0.02432  -4.186 2.84e-05 ***
## pitch_group10-19 Pitches          0.02999    0.03469   0.865 0.387293    
## pitch_group20-29 Pitches          0.09407    0.03797   2.478 0.013227 *  
## pitch_group30-39 Pitches          0.14168    0.04080   3.473 0.000515 ***
## pitch_group40-49 Pitches          0.14364    0.04311   3.332 0.000863 ***
## pitch_group50-59 Pitches          0.26702    0.04725   5.651 1.60e-08 ***
## pitch_group60-69 Pitches          0.11880    0.05157   2.304 0.021247 *  
## pitch_group70-79 Pitches          0.30002    0.05667   5.294 1.20e-07 ***
## pitch_group80-89 Pitches          0.23491    0.06518   3.604 0.000314 ***
## pitch_group90-99 Pitches          0.21402    0.08629   2.480 0.013128 *  
## pitch_groupMore Than 100 Pitches -0.03948    0.12851  -0.307 0.758714    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 3.881 on 123210 degrees of freedom
##   (578 observations deleted due to missingness)
## Multiple R-squared:  0.0005564,  Adjusted R-squared:  0.0004752 
## F-statistic: 6.859 on 10 and 123210 DF,  p-value: 8.367e-11
#as.data.frame(changeupVertBreakBinYear$coefficients)

changeupHorzBreakBinYear <- lm(HorzBreakDiffYear ~ pitch_group, data = changeupsYear)
summary(changeupHorzBreakBinYear)
## 
## Call:
## lm(formula = HorzBreakDiffYear ~ pitch_group, data = changeupsYear)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -33.890  -9.010  -0.349   9.083  33.684 
## 
## Coefficients:
##                                  Estimate Std. Error t value Pr(>|t|)  
## (Intercept)                      -0.01578    0.06912  -0.228    0.819  
## pitch_group10-19 Pitches         -0.07335    0.09861  -0.744    0.457  
## pitch_group20-29 Pitches          0.04296    0.10792   0.398    0.691  
## pitch_group30-39 Pitches          0.17918    0.11597   1.545    0.122  
## pitch_group40-49 Pitches          0.08605    0.12254   0.702    0.483  
## pitch_group50-59 Pitches         -0.14951    0.13440  -1.112    0.266  
## pitch_group60-69 Pitches         -0.05242    0.14670  -0.357    0.721  
## pitch_group70-79 Pitches         -0.08538    0.16098  -0.530    0.596  
## pitch_group80-89 Pitches          0.14784    0.18525   0.798    0.425  
## pitch_group90-99 Pitches          0.22619    0.24561   0.921    0.357  
## pitch_groupMore Than 100 Pitches  0.62245    0.36604   1.700    0.089 .
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 10.99 on 122238 degrees of freedom
##   (1550 observations deleted due to missingness)
## Multiple R-squared:  0.0001013,  Adjusted R-squared:  1.947e-05 
## F-statistic: 1.238 on 10 and 122238 DF,  p-value: 0.2604
#as.data.frame(changeupHorzBreakBinYear$coefficients)

Curveballs

curveballSpinRateBinYear <- lm(SpinRateDiffYear ~ pitch_group, data = curveballsYear)
summary(curveballSpinRateBinYear)
## 
## Call:
## lm(formula = SpinRateDiffYear ~ pitch_group, data = curveballsYear)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -1562.89   -57.98     6.74    70.82  1951.94 
## 
## Coefficients:
##                                   Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                        1.13908    0.95270   1.196 0.231840    
## pitch_group10-19 Pitches           5.02238    1.37771   3.645 0.000267 ***
## pitch_group20-29 Pitches           1.92152    1.51975   1.264 0.206102    
## pitch_group30-39 Pitches           0.01574    1.67678   0.009 0.992510    
## pitch_group40-49 Pitches          -2.34638    1.83787  -1.277 0.201718    
## pitch_group50-59 Pitches          -8.46786    1.99723  -4.240 2.24e-05 ***
## pitch_group60-69 Pitches          -9.88622    2.19267  -4.509 6.53e-06 ***
## pitch_group70-79 Pitches         -10.46660    2.41838  -4.328 1.51e-05 ***
## pitch_group80-89 Pitches         -15.27392    2.76171  -5.531 3.20e-08 ***
## pitch_group90-99 Pitches          -9.76875    3.57655  -2.731 0.006309 ** 
## pitch_groupMore Than 100 Pitches -11.42645    4.93958  -2.313 0.020712 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 134.8 on 87513 degrees of freedom
##   (1013 observations deleted due to missingness)
## Multiple R-squared:  0.00163,    Adjusted R-squared:  0.001516 
## F-statistic: 14.29 on 10 and 87513 DF,  p-value: < 2.2e-16
#as.data.frame(curveballSpinRateBinYear$coefficients)

curveballRelSpeedBinYear <- lm(RelSpeedDiffYear ~ pitch_group, data = curveballsYear)
summary(curveballRelSpeedBinYear)
## 
## Call:
## lm(formula = RelSpeedDiffYear ~ pitch_group, data = curveballsYear)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -10.0839  -0.9741  -0.0455   0.9101  20.0234 
## 
## Coefficients:
##                                   Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                       0.113009   0.011614   9.731  < 2e-16 ***
## pitch_group10-19 Pitches          0.004884   0.016792   0.291    0.771    
## pitch_group20-29 Pitches         -0.008058   0.018520  -0.435    0.663    
## pitch_group30-39 Pitches         -0.116465   0.020429  -5.701 1.19e-08 ***
## pitch_group40-49 Pitches         -0.155680   0.022403  -6.949 3.70e-12 ***
## pitch_group50-59 Pitches         -0.284222   0.024328 -11.683  < 2e-16 ***
## pitch_group60-69 Pitches         -0.380180   0.026727 -14.225  < 2e-16 ***
## pitch_group70-79 Pitches         -0.441171   0.029470 -14.970  < 2e-16 ***
## pitch_group80-89 Pitches         -0.528695   0.033677 -15.699  < 2e-16 ***
## pitch_group90-99 Pitches         -0.496301   0.043644 -11.372  < 2e-16 ***
## pitch_groupMore Than 100 Pitches -0.459348   0.060114  -7.641 2.17e-14 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.65 on 88282 degrees of freedom
##   (244 observations deleted due to missingness)
## Multiple R-squared:  0.009777,   Adjusted R-squared:  0.009665 
## F-statistic: 87.17 on 10 and 88282 DF,  p-value: < 2.2e-16
#as.data.frame(curveballRelSpeedBinYear$coefficients)

curveballVertBreakBinYear <- lm(VertBreakDiffYear ~ pitch_group, data = curveballsYear)
summary(curveballVertBreakBinYear)
## 
## Call:
## lm(formula = VertBreakDiffYear ~ pitch_group, data = curveballsYear)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -24.314  -2.232  -0.176   1.873  39.778 
## 
## Coefficients:
##                                  Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                      -0.11697    0.02729  -4.286 1.82e-05 ***
## pitch_group10-19 Pitches          0.11213    0.03944   2.843 0.004470 ** 
## pitch_group20-29 Pitches          0.14229    0.04350   3.271 0.001072 ** 
## pitch_group30-39 Pitches          0.16648    0.04798   3.470 0.000521 ***
## pitch_group40-49 Pitches          0.18215    0.05258   3.464 0.000532 ***
## pitch_group50-59 Pitches          0.16197    0.05710   2.837 0.004558 ** 
## pitch_group60-69 Pitches          0.19078    0.06274   3.041 0.002360 ** 
## pitch_group70-79 Pitches          0.18386    0.06910   2.661 0.007801 ** 
## pitch_group80-89 Pitches          0.18032    0.07900   2.282 0.022464 *  
## pitch_group90-99 Pitches          0.22444    0.10231   2.194 0.028260 *  
## pitch_groupMore Than 100 Pitches  0.05407    0.14082   0.384 0.701010    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 3.866 on 87878 degrees of freedom
##   (648 observations deleted due to missingness)
## Multiple R-squared:  0.0003229,  Adjusted R-squared:  0.0002091 
## F-statistic: 2.838 on 10 and 87878 DF,  p-value: 0.00157
#as.data.frame(curveballVertBreakBinYear$coefficients)


curveballHorzBreakBinYear <- lm(HorzBreakDiffYear ~ pitch_group, data = curveballsYear)
summary(curveballHorzBreakBinYear)
## 
## Call:
## lm(formula = HorzBreakDiffYear ~ pitch_group, data = curveballsYear)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -43.697  -8.961  -0.419   8.985  34.121 
## 
## Coefficients:
##                                   Estimate Std. Error t value Pr(>|t|)
## (Intercept)                       0.001758   0.077378   0.023    0.982
## pitch_group10-19 Pitches         -0.043857   0.111885  -0.392    0.695
## pitch_group20-29 Pitches          0.056363   0.123429   0.457    0.648
## pitch_group30-39 Pitches          0.015103   0.136132   0.111    0.912
## pitch_group40-49 Pitches         -0.118843   0.149226  -0.796    0.426
## pitch_group50-59 Pitches          0.123086   0.161908   0.760    0.447
## pitch_group60-69 Pitches         -0.175070   0.178197  -0.982    0.326
## pitch_group70-79 Pitches          0.072103   0.196116   0.368    0.713
## pitch_group80-89 Pitches          0.145017   0.224120   0.647    0.518
## pitch_group90-99 Pitches          0.049306   0.291437   0.169    0.866
## pitch_groupMore Than 100 Pitches  0.002830   0.399660   0.007    0.994
## 
## Residual standard error: 10.94 on 87444 degrees of freedom
##   (1082 observations deleted due to missingness)
## Multiple R-squared:  4.718e-05,  Adjusted R-squared:  -6.717e-05 
## F-statistic: 0.4126 on 10 and 87444 DF,  p-value: 0.9415
#as.data.frame(curveballHorzBreakBinYear$coefficients)

Cutters

cutterSpinRateBinYear <- lm(SpinRateDiffYear ~ pitch_group, data = cuttersYear)
summary(cutterSpinRateBinYear)
## 
## Call:
## lm(formula = SpinRateDiffYear ~ pitch_group, data = cuttersYear)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -1276.82   -53.80     3.38    61.36  1365.15 
## 
## Coefficients:
##                                  Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                        5.7401     2.0225   2.838 0.004544 ** 
## pitch_group10-19 Pitches          -2.4351     2.9862  -0.815 0.414828    
## pitch_group20-29 Pitches           0.1838     3.2922   0.056 0.955469    
## pitch_group30-39 Pitches          -3.7480     3.6487  -1.027 0.304344    
## pitch_group40-49 Pitches         -10.3194     4.1234  -2.503 0.012339 *  
## pitch_group50-59 Pitches         -20.2399     4.4892  -4.509 6.58e-06 ***
## pitch_group60-69 Pitches         -17.8565     5.0607  -3.528 0.000419 ***
## pitch_group70-79 Pitches         -22.8073     5.4923  -4.153 3.31e-05 ***
## pitch_group80-89 Pitches         -12.5231     6.2138  -2.015 0.043885 *  
## pitch_group90-99 Pitches         -26.5169     7.5197  -3.526 0.000423 ***
## pitch_groupMore Than 100 Pitches -18.6500    10.8137  -1.725 0.084610 .  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 115.9 on 13279 degrees of freedom
##   (110 observations deleted due to missingness)
## Multiple R-squared:  0.004475,   Adjusted R-squared:  0.003726 
## F-statistic: 5.969 on 10 and 13279 DF,  p-value: 4.355e-09
#as.data.frame(cutterSpinRateBinYear$coefficient)

cutterRelSpeedBinYear <- lm(RelSpeedDiffYear ~ pitch_group, data = cuttersYear)
summary(cutterRelSpeedBinYear)
## 
## Call:
## lm(formula = RelSpeedDiffYear ~ pitch_group, data = cuttersYear)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -13.2597  -0.8471   0.0108   0.8661  11.4139 
## 
## Coefficients:
##                                  Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                       0.20652    0.02642   7.818 5.79e-15 ***
## pitch_group10-19 Pitches         -0.10157    0.03901  -2.603  0.00924 ** 
## pitch_group20-29 Pitches         -0.17158    0.04301  -3.989 6.66e-05 ***
## pitch_group30-39 Pitches         -0.26536    0.04770  -5.563 2.70e-08 ***
## pitch_group40-49 Pitches         -0.33290    0.05386  -6.181 6.56e-10 ***
## pitch_group50-59 Pitches         -0.41626    0.05865  -7.097 1.34e-12 ***
## pitch_group60-69 Pitches         -0.39435    0.06611  -5.965 2.51e-09 ***
## pitch_group70-79 Pitches         -0.60866    0.07165  -8.495  < 2e-16 ***
## pitch_group80-89 Pitches         -0.68727    0.08113  -8.472  < 2e-16 ***
## pitch_group90-99 Pitches         -0.67949    0.09799  -6.934 4.28e-12 ***
## pitch_groupMore Than 100 Pitches -0.31865    0.14170  -2.249  0.02455 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.519 on 13362 degrees of freedom
##   (27 observations deleted due to missingness)
## Multiple R-squared:  0.01563,    Adjusted R-squared:  0.01489 
## F-statistic: 21.21 on 10 and 13362 DF,  p-value: < 2.2e-16
#as.data.frame(cutterRelSpeedBinYear$coefficient)

cutterVertBreakBinYear <- lm(VertBreakDiffYear ~ pitch_group, data = cuttersYear)
summary(cutterVertBreakBinYear)
## 
## Call:
## lm(formula = VertBreakDiffYear ~ pitch_group, data = cuttersYear)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -23.7816  -1.8442  -0.0694   1.7192  21.9358 
## 
## Coefficients:
##                                  Estimate Std. Error t value Pr(>|t|)  
## (Intercept)                      -0.03930    0.05602  -0.702   0.4830  
## pitch_group10-19 Pitches         -0.01649    0.08270  -0.199   0.8420  
## pitch_group20-29 Pitches         -0.07245    0.09122  -0.794   0.4271  
## pitch_group30-39 Pitches          0.02628    0.10113   0.260   0.7950  
## pitch_group40-49 Pitches          0.19637    0.11416   1.720   0.0854 .
## pitch_group50-59 Pitches          0.23781    0.12427   1.914   0.0557 .
## pitch_group60-69 Pitches          0.20182    0.14006   1.441   0.1496  
## pitch_group70-79 Pitches         -0.03363    0.15179  -0.222   0.8246  
## pitch_group80-89 Pitches          0.01865    0.17187   0.109   0.9136  
## pitch_group90-99 Pitches          0.50875    0.20796   2.446   0.0144 *
## pitch_groupMore Than 100 Pitches  0.20207    0.30017   0.673   0.5008  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 3.217 on 13343 degrees of freedom
##   (46 observations deleted due to missingness)
## Multiple R-squared:  0.001284,   Adjusted R-squared:  0.0005351 
## F-statistic: 1.715 on 10 and 13343 DF,  p-value: 0.07127
#as.data.frame(cutterVertBreakBinYear$coefficient)

cutterHorzBreakBinYear <- lm(HorzBreakDiffYear ~ pitch_group, data = cuttersYear)
summary(cutterHorzBreakBinYear)
## 
## Call:
## lm(formula = HorzBreakDiffYear ~ pitch_group, data = cuttersYear)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -27.727  -8.952  -1.156   9.130  33.587 
## 
## Coefficients:
##                                  Estimate Std. Error t value Pr(>|t|)   
## (Intercept)                       0.11248    0.19224   0.585  0.55849   
## pitch_group10-19 Pitches         -0.15201    0.28406  -0.535  0.59255   
## pitch_group20-29 Pitches          0.01521    0.31332   0.049  0.96129   
## pitch_group30-39 Pitches          0.22605    0.34802   0.650  0.51601   
## pitch_group40-49 Pitches         -0.03972    0.39237  -0.101  0.91937   
## pitch_group50-59 Pitches         -0.04007    0.42810  -0.094  0.92542   
## pitch_group60-69 Pitches         -0.46202    0.48122  -0.960  0.33702   
## pitch_group70-79 Pitches         -0.13910    0.52492  -0.265  0.79102   
## pitch_group80-89 Pitches         -1.56235    0.59431  -2.629  0.00858 **
## pitch_group90-99 Pitches         -0.81362    0.72166  -1.127  0.25958   
## pitch_groupMore Than 100 Pitches -1.57748    1.03253  -1.528  0.12659   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 11.02 on 13233 degrees of freedom
##   (156 observations deleted due to missingness)
## Multiple R-squared:  0.0009412,  Adjusted R-squared:  0.0001862 
## F-statistic: 1.247 on 10 and 13233 DF,  p-value: 0.2552
#as.data.frame(cutterHorzBreakBinYear$coefficient)

Sliders

sliderSpinRateBinYear <- lm(SpinRateDiffYear ~ pitch_group, data = slidersYear)
summary(sliderSpinRateBinYear)
## 
## Call:
## lm(formula = SpinRateDiffYear ~ pitch_group, data = slidersYear)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -1919.61   -58.25    10.39    77.50  2297.09 
## 
## Coefficients:
##                                  Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                        3.1797     0.6708   4.740 2.14e-06 ***
## pitch_group10-19 Pitches           2.3261     0.9830   2.366   0.0180 *  
## pitch_group20-29 Pitches           0.9530     1.0943   0.871   0.3838    
## pitch_group30-39 Pitches          -5.2996     1.2216  -4.338 1.44e-05 ***
## pitch_group40-49 Pitches          -8.2343     1.3638  -6.038 1.56e-09 ***
## pitch_group50-59 Pitches         -10.7516     1.5018  -7.159 8.15e-13 ***
## pitch_group60-69 Pitches         -12.8987     1.6581  -7.779 7.34e-15 ***
## pitch_group70-79 Pitches         -15.9171     1.8620  -8.548  < 2e-16 ***
## pitch_group80-89 Pitches         -13.7920     2.1527  -6.407 1.49e-10 ***
## pitch_group90-99 Pitches         -19.6681     2.8346  -6.939 3.97e-12 ***
## pitch_groupMore Than 100 Pitches  -7.6781     4.0257  -1.907   0.0565 .  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 153.2 on 209255 degrees of freedom
##   (4771 observations deleted due to missingness)
## Multiple R-squared:  0.001513,   Adjusted R-squared:  0.001465 
## F-statistic: 31.71 on 10 and 209255 DF,  p-value: < 2.2e-16
#as.data.frame(sliderSpinRateBinYear$coefficient)

sliderRelSpeedBinYear <- lm(RelSpeedDiffYear ~ pitch_group, data = slidersYear)
summary(sliderRelSpeedBinYear)
## 
## Call:
## lm(formula = RelSpeedDiffYear ~ pitch_group, data = slidersYear)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -28.5091  -0.9942  -0.0013   0.9925  16.4960 
## 
## Coefficients:
##                                   Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                       0.140549   0.007593  18.509  < 2e-16 ***
## pitch_group10-19 Pitches         -0.007899   0.011129  -0.710    0.478    
## pitch_group20-29 Pitches         -0.083878   0.012392  -6.769  1.3e-11 ***
## pitch_group30-39 Pitches         -0.154730   0.013829 -11.189  < 2e-16 ***
## pitch_group40-49 Pitches         -0.245708   0.015445 -15.908  < 2e-16 ***
## pitch_group50-59 Pitches         -0.302369   0.017004 -17.782  < 2e-16 ***
## pitch_group60-69 Pitches         -0.471239   0.018803 -25.062  < 2e-16 ***
## pitch_group70-79 Pitches         -0.517617   0.021109 -24.521  < 2e-16 ***
## pitch_group80-89 Pitches         -0.621009   0.024365 -25.487  < 2e-16 ***
## pitch_group90-99 Pitches         -0.605815   0.032147 -18.845  < 2e-16 ***
## pitch_groupMore Than 100 Pitches -0.506722   0.045360 -11.171  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.752 on 213521 degrees of freedom
##   (505 observations deleted due to missingness)
## Multiple R-squared:  0.01053,    Adjusted R-squared:  0.01049 
## F-statistic: 227.3 on 10 and 213521 DF,  p-value: < 2.2e-16
#as.data.frame(sliderRelSpeedBinYear$coefficient)

sliderVertBreakBinYear <- lm(VertBreakDiffYear ~ pitch_group, data = slidersYear)
summary(sliderVertBreakBinYear)
## 
## Call:
## lm(formula = VertBreakDiffYear ~ pitch_group, data = slidersYear)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -27.285  -2.178  -0.038   2.083  56.125 
## 
## Coefficients:
##                                    Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                      -0.0410648  0.0167616  -2.450 0.014289 *  
## pitch_group10-19 Pitches          0.0495680  0.0245674   2.018 0.043631 *  
## pitch_group20-29 Pitches          0.0269975  0.0273479   0.987 0.323553    
## pitch_group30-39 Pitches          0.1084424  0.0305254   3.553 0.000382 ***
## pitch_group40-49 Pitches          0.0484361  0.0340953   1.421 0.155431    
## pitch_group50-59 Pitches          0.0322589  0.0375425   0.859 0.390197    
## pitch_group60-69 Pitches          0.0391902  0.0415190   0.944 0.345216    
## pitch_group70-79 Pitches          0.1816247  0.0466087   3.897 9.75e-05 ***
## pitch_group80-89 Pitches          0.0008113  0.0537635   0.015 0.987961    
## pitch_group90-99 Pitches         -0.0380980  0.0709716  -0.537 0.591402    
## pitch_groupMore Than 100 Pitches  0.0889536  0.1002269   0.888 0.374798    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 3.86 on 212698 degrees of freedom
##   (1328 observations deleted due to missingness)
## Multiple R-squared:  0.000124,   Adjusted R-squared:  7.699e-05 
## F-statistic: 2.638 on 10 and 212698 DF,  p-value: 0.003266
#as.data.frame(sliderVertBreakBinYear$coefficient)


sliderHorzBreakBinYear <- lm(HorzBreakDiffYear ~ pitch_group, data = slidersYear)
summary(sliderHorzBreakBinYear)
## 
## Call:
## lm(formula = HorzBreakDiffYear ~ pitch_group, data = slidersYear)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -34.437  -9.242  -0.921   9.320  38.222 
## 
## Coefficients:
##                                  Estimate Std. Error t value Pr(>|t|)   
## (Intercept)                       0.05040    0.04844   1.041  0.29804   
## pitch_group10-19 Pitches         -0.19822    0.07104  -2.790  0.00526 **
## pitch_group20-29 Pitches         -0.01725    0.07911  -0.218  0.82739   
## pitch_group30-39 Pitches          0.02523    0.08832   0.286  0.77512   
## pitch_group40-49 Pitches         -0.11963    0.09860  -1.213  0.22499   
## pitch_group50-59 Pitches          0.02370    0.10860   0.218  0.82723   
## pitch_group60-69 Pitches         -0.07735    0.12032  -0.643  0.52031   
## pitch_group70-79 Pitches          0.04665    0.13505   0.345  0.72979   
## pitch_group80-89 Pitches          0.08510    0.15584   0.546  0.58502   
## pitch_group90-99 Pitches          0.09221    0.20635   0.447  0.65498   
## pitch_groupMore Than 100 Pitches -0.14450    0.29100  -0.497  0.61950   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 11.13 on 211257 degrees of freedom
##   (2769 observations deleted due to missingness)
## Multiple R-squared:  6.55e-05,   Adjusted R-squared:  1.817e-05 
## F-statistic: 1.384 on 10 and 211257 DF,  p-value: 0.1805
#as.data.frame(sliderHorzBreakBinYear$coefficient)

Sinkers

sinkersSpinRateBinYear <- lm(SpinRateDiffYear ~ pitch_group, data = sinkersYear)
summary(sinkersSpinRateBinYear)
## 
## Call:
## lm(formula = SpinRateDiffYear ~ pitch_group, data = sinkersYear)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -989.87  -52.80    1.22   56.76  710.06 
## 
## Coefficients:
##                                    Estimate Std. Error t value Pr(>|t|)   
## (Intercept)                        1.219078   1.407269   0.866  0.38635   
## pitch_group10-19 Pitches           4.794218   2.140123   2.240  0.02509 * 
## pitch_group20-29 Pitches          -0.360638   2.326097  -0.155  0.87679   
## pitch_group30-39 Pitches          -1.471929   2.671352  -0.551  0.58164   
## pitch_group40-49 Pitches          -4.527639   3.007504  -1.505  0.13223   
## pitch_group50-59 Pitches          -6.829206   3.340819  -2.044  0.04095 * 
## pitch_group60-69 Pitches         -11.422726   3.607824  -3.166  0.00155 **
## pitch_group70-79 Pitches         -12.984389   4.004572  -3.242  0.00119 **
## pitch_group80-89 Pitches          -0.003152   4.488724  -0.001  0.99944   
## pitch_group90-99 Pitches         -12.918962   5.539613  -2.332  0.01971 * 
## pitch_groupMore Than 100 Pitches   2.152943   8.189170   0.263  0.79263   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 97.48 on 17972 degrees of freedom
##   (62 observations deleted due to missingness)
## Multiple R-squared:  0.002452,   Adjusted R-squared:  0.001897 
## F-statistic: 4.417 on 10 and 17972 DF,  p-value: 3.124e-06
sinkersRelSpeedBinYear <- lm(RelSpeedDiffYear ~ pitch_group, data = sinkersYear)
summary(sinkersRelSpeedBinYear)
## 
## Call:
## lm(formula = RelSpeedDiffYear ~ pitch_group, data = sinkersYear)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -15.9628  -0.6982   0.0325   0.7699   7.7123 
## 
## Coefficients:
##                                  Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                       0.34539    0.01891  18.266  < 2e-16 ***
## pitch_group10-19 Pitches         -0.21119    0.02874  -7.349 2.08e-13 ***
## pitch_group20-29 Pitches         -0.28972    0.03125  -9.271  < 2e-16 ***
## pitch_group30-39 Pitches         -0.48009    0.03589 -13.376  < 2e-16 ***
## pitch_group40-49 Pitches         -0.58259    0.04039 -14.423  < 2e-16 ***
## pitch_group50-59 Pitches         -0.72496    0.04490 -16.147  < 2e-16 ***
## pitch_group60-69 Pitches         -0.83242    0.04846 -17.177  < 2e-16 ***
## pitch_group70-79 Pitches         -0.91636    0.05375 -17.049  < 2e-16 ***
## pitch_group80-89 Pitches         -0.85191    0.06027 -14.134  < 2e-16 ***
## pitch_group90-99 Pitches         -0.93975    0.07445 -12.623  < 2e-16 ***
## pitch_groupMore Than 100 Pitches -0.85333    0.11006  -7.754 9.41e-15 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.31 on 17993 degrees of freedom
##   (41 observations deleted due to missingness)
## Multiple R-squared:  0.04985,    Adjusted R-squared:  0.04932 
## F-statistic:  94.4 on 10 and 17993 DF,  p-value: < 2.2e-16
sinkersVertBreakBinYear <- lm(VertBreakDiffYear ~ pitch_group, data = sinkersYear)
summary(sinkersVertBreakBinYear)
## 
## Call:
## lm(formula = VertBreakDiffYear ~ pitch_group, data = sinkersYear)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -21.5546  -1.6612   0.0173   1.6575  17.4043 
## 
## Coefficients:
##                                   Estimate Std. Error t value Pr(>|t|)  
## (Intercept)                      -0.032401   0.040722  -0.796   0.4262  
## pitch_group10-19 Pitches          0.087538   0.061873   1.415   0.1571  
## pitch_group20-29 Pitches          0.058048   0.067317   0.862   0.3885  
## pitch_group30-39 Pitches         -0.041624   0.077289  -0.539   0.5902  
## pitch_group40-49 Pitches         -0.012714   0.086989  -0.146   0.8838  
## pitch_group50-59 Pitches         -0.008553   0.096734  -0.088   0.9295  
## pitch_group60-69 Pitches          0.070180   0.104330   0.673   0.5012  
## pitch_group70-79 Pitches         -0.055414   0.115709  -0.479   0.6320  
## pitch_group80-89 Pitches          0.094518   0.129866   0.728   0.4667  
## pitch_group90-99 Pitches          0.366821   0.160268   2.289   0.0221 *
## pitch_groupMore Than 100 Pitches  0.070593   0.236922   0.298   0.7657  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 2.82 on 17980 degrees of freedom
##   (54 observations deleted due to missingness)
## Multiple R-squared:  0.0005471,  Adjusted R-squared:  -8.771e-06 
## F-statistic: 0.9842 on 10 and 17980 DF,  p-value: 0.4545
sinkersHorzBreakBinYear <- lm(HorzBreakDiffYear ~ pitch_group, data = sinkersYear)
summary(sinkersHorzBreakBinYear)
## 
## Call:
## lm(formula = HorzBreakDiffYear ~ pitch_group, data = sinkersYear)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -29.581  -8.800  -0.734   8.828  41.458 
## 
## Coefficients:
##                                  Estimate Std. Error t value Pr(>|t|)  
## (Intercept)                       0.07817    0.15745   0.496   0.6196  
## pitch_group10-19 Pitches         -0.14002    0.23928  -0.585   0.5584  
## pitch_group20-29 Pitches         -0.13743    0.26034  -0.528   0.5976  
## pitch_group30-39 Pitches          0.01033    0.29893   0.035   0.9724  
## pitch_group40-49 Pitches         -0.25693    0.33676  -0.763   0.4455  
## pitch_group50-59 Pitches          0.23568    0.37500   0.628   0.5297  
## pitch_group60-69 Pitches         -0.54405    0.40475  -1.344   0.1789  
## pitch_group70-79 Pitches         -0.50730    0.45096  -1.125   0.2606  
## pitch_group80-89 Pitches         -0.36109    0.50530  -0.715   0.4749  
## pitch_group90-99 Pitches          0.85636    0.62559   1.369   0.1711  
## pitch_groupMore Than 100 Pitches  1.95723    0.91587   2.137   0.0326 *
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 10.86 on 17800 degrees of freedom
##   (234 observations deleted due to missingness)
## Multiple R-squared:  0.0006843,  Adjusted R-squared:  0.0001229 
## F-statistic: 1.219 on 10 and 17800 DF,  p-value: 0.2727

##Visualizing Regression Coefficients

library(jtools)
plot_summs(fastballSpinRateBinYear, changeupSpinRateBinYear, curveballSpinRateBinYear, cutterSpinRateBinYear, sliderSpinRateBinYear, model.names = c('Fastball', 'ChangeUp', 'Curveball', 'Cutter', 'Slider'), omit.coefs = c('(Intercept)', 'pitchBin9.+. 100+'))
## Registered S3 methods overwritten by 'broom':
##   method            from  
##   tidy.glht         jtools
##   tidy.summary.glht jtools
## Loading required namespace: broom.mixed

Release Speed Summary Graph

plot_summs(fastballRelSpeedBinYear, changeupRelSpeedBinYear, curveballRelSpeedBinYear, cutterRelSpeedBinYear, sliderRelSpeedBinYear, model.names = c('Fastball', 'ChangeUp', 'Curveball', 'Cutter', 'Slider'), omit.coefs = c('(Intercept)', 'pitchBin9.+. 100+'))

VErt Break

plot_summs(fastballVertBreakBinYear, changeupVertBreakBinYear, curveballVertBreakBinYear, cutterVertBreakBinYear, sliderVertBreakBinYear, model.names = c('Fastball', 'ChangeUp', 'Curveball', 'Cutter', 'Slider'), omit.coefs = c('(Intercept)', 'pitchBin9.+. 100+'))

Horz Break

plot_summs(fastballHorzBreakBinYear, changeupHorzBreakBinYear, curveballHorzBreakBinYear, cutterHorzBreakBinYear, sliderHorzBreakBinYear, model.names = c('Fastball', 'ChangeUp', 'Curveball', 'Cutter', 'Slider'), omit.coefs = c('(Intercept)', 'pitchBin9.+. 100+'))

Regressing against WHIP and FIP

Add column for ruesult of play

data1ac <- sqldf("select 
               CASE WHEN PitchCall = 'InPlay' THEN TaggedHitType
               WHEN PitchCall = 'BallCalled' AND KorBB = 'Walk' THEN KorBB 
               WHEN PitchCall = 'StrikeSwinging' AND KorBB = 'Strikeout' THEN 'SwingingStikeout'
               WHEN PitchCall = 'StrikeCalled' AND KorBB = 'Strikeout' THEN 'StrikeoutLooking'
               ELSE PitchCall END AS Result
               FROM pitches" )
pitches['Result'] = data1ac['Result']

THe . in the column name is causing problems. Let’s fix that

pitches$TopOrBottom <- pitches$Top.Bottom

I only want one row for each plate appearance. THe last one that shows how it ended.(With a hit, walk, home run, strikeout etc.) I can’t think of many ways to do this other than another fat for loop. If its ordered by date I’ll loop through it and compare each batterid to the next batter id. if they are different then I add that row to my new dataset

intermediate_step <- sqldf("SELECT PitcherId, BatterTeam, GameID, PitcherTeam, Date, Inning, TopOrBottom, PAofInning, BatterId, Result, PlayResult, KorBB, pitch_count - PitchofPA + 1 AS CountatFirstPitch, PitchofPA from pitches ORDER BY GameID, BatterTeam, Inning, PAofInning ")


length(intermediate_step$BatterId)
## [1] 1144455
#Well run into problems with the occasional batter without a BAtter ID (NA)
#Therefore, I'm going to replace the NA batterIds it 0000 or something

#First lets see how many there are
mean(is.na(intermediate_step$BatterId))
## [1] 0.0002621335
#looks like .026 percent. Thats not bad. It isnt a problem unless theres 2 consecuive batters with no id

#I'm going to make NA BatterIds equal 999999
intermediate_step$BatterId <- intermediate_step$BatterId %>% replace(is.na(.), 999999)


last_pitches = intermediate_step %>%
  group_by(GameID, Inning, BatterId, PAofInning) %>%
  slice_tail(n=1) %>%
  arrange(GameID, Inning, PitcherTeam, PAofInning)


dim(last_pitches)
## [1] 300503     14
#Neew code chucnk just to take a look at last_pitches
head(last_pitches, 300)
ABCDEFGHIJ0123456789
PitcherId
<dbl>
BatterTeam
<chr>
GameID
<chr>
PitcherTeam
<chr>
Date
<chr>
Inning
<int>
TopOrBottom
<chr>
8900656NEV_WOL20190223-BlairField-1LON_DIR2019-02-231Top
8900656NEV_WOL20190223-BlairField-1LON_DIR2019-02-231Top
8900656NEV_WOL20190223-BlairField-1LON_DIR2019-02-231Top
8900656NEV_WOL20190223-BlairField-1LON_DIR2019-02-231Top
8900656NEV_WOL20190223-BlairField-1LON_DIR2019-02-231Top
8900656NEV_WOL20190223-BlairField-1LON_DIR2019-02-231Top
8900656NEV_WOL20190223-BlairField-1LON_DIR2019-02-231Top
1000017008LON_DIR20190223-BlairField-1NEV_WOL2019-02-231Bottom
1000017008LON_DIR20190223-BlairField-1NEV_WOL2019-02-231Bottom
1000017008LON_DIR20190223-BlairField-1NEV_WOL2019-02-231Bottom
# Add a new factor column to the dataframe for the pitch group
last_pitches$first_pitch_bin <- as.factor(ifelse(last_pitches$CountatFirstPitch < 100, (last_pitches$CountatFirstPitch) %/% 10 + 1, 11))

# Check the updated dataframe
head(last_pitches, 250)
ABCDEFGHIJ0123456789
PitcherId
<dbl>
BatterTeam
<chr>
GameID
<chr>
PitcherTeam
<chr>
Date
<chr>
Inning
<int>
TopOrBottom
<chr>
8900656NEV_WOL20190223-BlairField-1LON_DIR2019-02-231Top
8900656NEV_WOL20190223-BlairField-1LON_DIR2019-02-231Top
8900656NEV_WOL20190223-BlairField-1LON_DIR2019-02-231Top
8900656NEV_WOL20190223-BlairField-1LON_DIR2019-02-231Top
8900656NEV_WOL20190223-BlairField-1LON_DIR2019-02-231Top
8900656NEV_WOL20190223-BlairField-1LON_DIR2019-02-231Top
8900656NEV_WOL20190223-BlairField-1LON_DIR2019-02-231Top
1000017008LON_DIR20190223-BlairField-1NEV_WOL2019-02-231Bottom
1000017008LON_DIR20190223-BlairField-1NEV_WOL2019-02-231Bottom
1000017008LON_DIR20190223-BlairField-1NEV_WOL2019-02-231Bottom
#Let make a first_Pitch_group Column

last_pitches$first_pitch_group <- NA

last_pitches$first_pitch_group[last_pitches$first_pitch_bin == '1'] <- '0-9 pitches'
last_pitches$first_pitch_group[last_pitches$first_pitch_bin == '2'] <- '10-19 pitches'
last_pitches$first_pitch_group[last_pitches$first_pitch_bin == '3'] <- '20-29 pitches'
last_pitches$first_pitch_group[last_pitches$first_pitch_bin == '4'] <- '30-39 pitches'
last_pitches$first_pitch_group[last_pitches$first_pitch_bin == '5'] <- '40-49 pitches'
last_pitches$first_pitch_group[last_pitches$first_pitch_bin == '6'] <- '50-59 pitches'
last_pitches$first_pitch_group[last_pitches$first_pitch_bin == '7'] <- '60-69 pitches'
last_pitches$first_pitch_group[last_pitches$first_pitch_bin == '8'] <- '70-79 pitches'
last_pitches$first_pitch_group[last_pitches$first_pitch_bin == '9'] <- '80-89 pitches'
last_pitches$first_pitch_group[last_pitches$first_pitch_bin == '10'] <- '90-99 pitches'
last_pitches$first_pitch_group[last_pitches$first_pitch_bin == '11'] <- 'More Than 100 Pitches'

#MAke sure the order is correct. Really annoying if regression output isn't in ascending order
sqldf("SELECT first_pitch_group, count(*) from last_pitches GROUP BY first_pitch_group ORDER BY first_pitch_group")
ABCDEFGHIJ0123456789
first_pitch_group
<chr>
count(*)
<int>
NA823
0-9 pitches89373
10-19 pitches58844
20-29 pitches41731
30-39 pitches30461
40-49 pitches23003
50-59 pitches17982
60-69 pitches14511
70-79 pitches10884
80-89 pitches7399

Last Pitches is looking pretty good. I need a way to quanitfy the result of the play. Probably will do a walk column, hit column, total bases column

last_pitches$result2 <- NA
last_pitches$result3 <- NA

last_pitches$result2 <- ifelse(!(is.na(last_pitches$PlayResult)) & last_pitches$PlayResult != 'Undefined', last_pitches$PlayResult, NA)

last_pitches$result2 <- ifelse(!(is.na(last_pitches$KorBB)) & last_pitches$KorBB != 'Undefined', last_pitches$Result, last_pitches$result2)

last_pitches$result2 <- ifelse(!(is.na(last_pitches$Result)) & last_pitches$Result == 'HitByPitch', last_pitches$Result, last_pitches$result2)

last_pitches$result2 <- ifelse(is.na(last_pitches$result2), "###ISSUE###", last_pitches$result2)

last_pitches$result3 <- ifelse(is.na(last_pitches$result2), last_pitches$Result, last_pitches$result3)
sqldf("SELECT result2, count(*) FROM last_pitches GROUP BY result2")
ABCDEFGHIJ0123456789
result2
<chr>
count(*)
<int>
112
###ISSUE###1452
BallIntentional685
BallinDirt415
Double13099
Error4836
FieldersChoice3735
Fielderschoice13
FlyBall2
GroundBall1

Fix the nonsense

newdata <- last_pitches[(last_pitches$result2 %in% c('Double','Error','FieldersChoice', 'Fielderschoice', 'HitByPitch', 'HomeRun', 'Homerun', 'Out', 'Sacrifice', 'Single', 'StrikeoutLooking', 'SwingingStikeout', 'Triple', 'Walk')), ]

newdata1 <- newdata

newdata1$result2[newdata$result2 == 'Fielderschoice'] <- 'FieldersChoice'
newdata1$result2[newdata$result2 == 'Homerun'] <- 'HomeRun'

if it does work let me write to csv real quick

write.csv(newdata1, 'C:\\Users\\Nick\\UCSB Baseball\\NickAllCollegeTrackmanAtBatsNew.csv')
atBats <- read.csv('C:\\Users\\Nick\\UCSB Baseball\\NickAllCollegeTrackmanAtBatsNew.csv')

Now that we have atBats dataframe ready, lets run some regresssions on results based metrics WHIP, FIP

#WHIP and FIP regressions

##WHIP

#Making some dummy columns
library(fastDummies)
lastPitches <- dummy_cols(atBats, select_columns = 'result2')

##WHIP regression

lastPitches$walksPlusHits <- lastPitches$result2_Walk + lastPitches$result2_Single + lastPitches$result2_Double + lastPitches$result2_Triple + lastPitches$result2_HomeRun + lastPitches$result2_HitByPitch
lastPitches$outs <- lastPitches$result2_StrikeoutLooking + lastPitches$result2_SwingingStikeout + lastPitches$result2_FieldersChoice + lastPitches$result2_Out + lastPitches$result2_Sacrifice

Ok lets try to group by firstPitchGroup and combine these dummy columns in a way that calculates opp average, whip, etc

whip_bin_avgs <- sqldf("SELECT first_pitch_group, count(*),
                   cast((sum(cast(walksPlusHits as float)) / sum(cast(outs as float)) * 3.00) as float) AS whip FROM lastPitches group by first_pitch_group")


whip_bin_avgs
ABCDEFGHIJ0123456789
first_pitch_group
<chr>
count(*)
<int>
whip
<dbl>
NA8092.455782
0-9 pitches887141.775129
10-19 pitches582471.730861
20-29 pitches413251.663266
30-39 pitches301631.752321
40-49 pitches228211.726143
50-59 pitches178111.666874
60-69 pitches143841.693023
70-79 pitches107941.669259
80-89 pitches73281.573573

Probably should make another SQL querly like the one bove bt gets a pitchers individual whip so we can see if its higher when the pitch count is higher. Either way I’d need yet another query that groups At Bats in the lastPitches df by both picher and Pitch Count Bin

whip_bin_pitcher_avgs <- sqldf("SELECT first_pitch_group, PitcherId, count(*) as numAtBats,
                   cast((sum(cast(walksPlusHits as float)) / sum(cast(outs as float)) * 3.00) as float) AS whip_bin FROM lastPitches group by first_pitch_group, PitcherId ORDER BY count(*) DESC")



whip_bin_pitcher_avgs_ten_plus <- whip_bin_pitcher_avgs[whip_bin_pitcher_avgs$numAtBats >= 4, ]
whip_pitcher_avgs <- sqldf("SELECT PitcherId, count(*),
                   cast((sum(cast(walksPlusHits as float)) / sum(cast(outs as float)) * 3.00) as float) AS whip_avg FROM lastPitches group by PitcherId ORDER BY count(*) DESC")

whip_pitcher_avgs
ABCDEFGHIJ0123456789
PitcherId
<dbl>
count(*)
<int>
whip_avg
<dbl>
10000491764901.4860681
10000369223951.5944882
10000266103911.9173913
10000851933861.0249110
10000804013731.7816594
10000490783671.8616071
10000103533671.6282051
10000585393491.6986301
10000293263471.0546875
10000516773401.3318966
whipdf <- left_join(whip_bin_pitcher_avgs_ten_plus, whip_pitcher_avgs, on = 'PitcherId')
## Joining, by = "PitcherId"
whipdf$diff <- whipdf$whip_bin - whipdf$whip_avg

Linear regression whip diff against pitch bin

whip_diff_mod <- lm(diff ~ first_pitch_group, data = whipdf, weights = numAtBats)
summary(whip_diff_mod)
## 
## Call:
## lm(formula = diff ~ first_pitch_group, data = whipdf, weights = numAtBats)
## 
## Weighted Residuals:
##     Min      1Q  Median      3Q     Max 
## -51.320  -2.796  -0.903   1.664  84.547 
## 
## Coefficients:
##                                        Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                             0.14448    0.01912   7.555 4.35e-14 ***
## first_pitch_group10-19 pitches          0.04997    0.03073   1.626 0.104020    
## first_pitch_group20-29 pitches          0.05781    0.03480   1.661 0.096650 .  
## first_pitch_group30-39 pitches          0.24717    0.03944   6.267 3.75e-10 ***
## first_pitch_group40-49 pitches          0.22669    0.04426   5.122 3.05e-07 ***
## first_pitch_group50-59 pitches          0.15657    0.04929   3.177 0.001493 ** 
## first_pitch_group60-69 pitches          0.25429    0.05391   4.716 2.42e-06 ***
## first_pitch_group70-79 pitches          0.24326    0.06176   3.939 8.22e-05 ***
## first_pitch_group80-89 pitches          0.20015    0.07473   2.678 0.007402 ** 
## first_pitch_group90-99 pitches          0.37981    0.10675   3.558 0.000375 ***
## first_pitch_groupMore Than 100 Pitches  0.29814    0.17317   1.722 0.085148 .  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 5.524 on 21553 degrees of freedom
##   (165 observations deleted due to missingness)
## Multiple R-squared:  0.004121,   Adjusted R-squared:  0.003659 
## F-statistic: 8.919 on 10 and 21553 DF,  p-value: 8.359e-15
whip_diff_mod
## 
## Call:
## lm(formula = diff ~ first_pitch_group, data = whipdf, weights = numAtBats)
## 
## Coefficients:
##                            (Intercept)          first_pitch_group10-19 pitches  
##                                0.14448                                 0.04997  
##         first_pitch_group20-29 pitches          first_pitch_group30-39 pitches  
##                                0.05781                                 0.24717  
##         first_pitch_group40-49 pitches          first_pitch_group50-59 pitches  
##                                0.22669                                 0.15657  
##         first_pitch_group60-69 pitches          first_pitch_group70-79 pitches  
##                                0.25429                                 0.24326  
##         first_pitch_group80-89 pitches          first_pitch_group90-99 pitches  
##                                0.20015                                 0.37981  
## first_pitch_groupMore Than 100 Pitches  
##                                0.29814

##Fip regression Fip is just walks, homeruns and strikeouts. Let me google the formuler real quick

lastPitches$BBFIP <- (lastPitches$result2_Walk + lastPitches$result2_HitByPitch) * 3.00
lastPitches$HRFIP <- lastPitches$result2_HomeRun * 13.00
lastPitches$KFIP <- lastPitches$result2_Strikeout * (-2.00)
lastPitches$NUMERATOR <- (lastPitches$BBFIP + lastPitches$HRFIP + lastPitches$KFIP)*3

Ok lets try to group by firstPitchGroup and combine these dummy columns in a way that calculates opp average, whip, etc FIP constant 3.9 is whats being used

fip_bin_avgs <- sqldf("SELECT first_pitch_group, count(*),
                   sum(NUMERATOR)/sum(outs) + 3.90 AS fip FROM lastPitches group by first_pitch_group")


fip_bin_avgs
ABCDEFGHIJ0123456789
first_pitch_group
<chr>
count(*)
<int>
fip
<dbl>
NA8099.770748
0-9 pitches887146.814819
10-19 pitches582476.761921
20-29 pitches413256.558911
30-39 pitches301636.746302
40-49 pitches228216.816579
50-59 pitches178116.583818
60-69 pitches143846.583056
70-79 pitches107946.623991
80-89 pitches73286.584778

Probably should make another SQL querly like the one bove bt gets a pitchers individual whip so we can see if its higher when the pitch count is higher. Maybe ill get that pitches whip with, say, 0-20 pitches and see if theres an increase when count is higher. Either way I’d need yet another query that groups At Bats in the lastPitches df by both picher and Pitch Count Bin

fip_bin_pitcher_avgs <- sqldf("SELECT first_pitch_group, PitcherId, count(*) as numAtBats,
                   sum(NUMERATOR)/sum(outs) + 3.90 AS fip_bin FROM lastPitches group by first_pitch_group, PitcherId ORDER BY count(*) DESC")



fip_bin_pitcher_avgs_ten_plus <- fip_bin_pitcher_avgs[fip_bin_pitcher_avgs$numAtBats >= 4, ]
fip_pitcher_avgs <- sqldf("SELECT PitcherId, count(*),
                   sum(NUMERATOR)/sum(outs) + 3.90 AS fip_avg FROM lastPitches group by PitcherId ORDER BY count(*) DESC")

fip_pitcher_avgs
ABCDEFGHIJ0123456789
PitcherId
<dbl>
count(*)
<int>
fip_avg
<dbl>
10000491764905.841176
10000369223956.073228
10000266103916.039130
10000851933865.746975
10000804013735.498253
10000490783677.315179
10000103533677.810256
10000585393496.571233
10000293263475.575781
10000516773406.848276
fipdf <- left_join(fip_bin_pitcher_avgs_ten_plus, fip_pitcher_avgs, on = 'PitcherId')
## Joining, by = "PitcherId"
fipdf$diff_fip <- fipdf$fip_bin - fipdf$fip_avg

Linear regression whip diff against pitch bin

fip_diff_mod <- lm(diff_fip ~ first_pitch_group, data = fipdf, weights = numAtBats)
summary(fip_diff_mod)
## 
## Call:
## lm(formula = diff_fip ~ first_pitch_group, data = fipdf, weights = numAtBats)
## 
## Weighted Residuals:
##      Min       1Q   Median       3Q      Max 
## -221.240   -7.973   -2.804    4.303  313.085 
## 
## Coefficients:
##                                        Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                             0.22021    0.05877   3.747 0.000180 ***
## first_pitch_group10-19 pitches          0.16630    0.09446   1.761 0.078326 .  
## first_pitch_group20-29 pitches          0.05993    0.10694   0.560 0.575229    
## first_pitch_group30-39 pitches          0.63672    0.12121   5.253 1.51e-07 ***
## first_pitch_group40-49 pitches          0.57933    0.13603   4.259 2.06e-05 ***
## first_pitch_group50-59 pitches          0.34574    0.15148   2.282 0.022481 *  
## first_pitch_group60-69 pitches          0.48029    0.16570   2.898 0.003754 ** 
## first_pitch_group70-79 pitches          0.67495    0.18982   3.556 0.000378 ***
## first_pitch_group80-89 pitches          0.67152    0.22966   2.924 0.003460 ** 
## first_pitch_group90-99 pitches          0.89788    0.32810   2.737 0.006213 ** 
## first_pitch_groupMore Than 100 Pitches  1.00923    0.53223   1.896 0.057943 .  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 16.98 on 21553 degrees of freedom
##   (165 observations deleted due to missingness)
## Multiple R-squared:  0.002878,   Adjusted R-squared:  0.002416 
## F-statistic: 6.222 on 10 and 21553 DF,  p-value: 1.424e-09
as.data.frame(fip_diff_mod$coefficients)
ABCDEFGHIJ0123456789
 
 
fip_diff_mod$coefficients
<dbl>
(Intercept)0.22021394
first_pitch_group10-19 pitches0.16630244
first_pitch_group20-29 pitches0.05992669
first_pitch_group30-39 pitches0.63671842
first_pitch_group40-49 pitches0.57932858
first_pitch_group50-59 pitches0.34573559
first_pitch_group60-69 pitches0.48028900
first_pitch_group70-79 pitches0.67495153
first_pitch_group80-89 pitches0.67152095
first_pitch_group90-99 pitches0.89788063

#BABIP (Batting Average on BAlls In Play)

lastPitches$BIP <- lastPitches$result2_Triple + lastPitches$result2_Single +lastPitches$result2_Double + lastPitches$result2_HomeRun + lastPitches$result2_FieldersChoice + lastPitches$result2_Out + lastPitches$result2_Sacrifice

lastPitches$hits <- lastPitches$result2_Triple + lastPitches$result2_Single +lastPitches$result2_Double + lastPitches$result2_HomeRun

Get each pitchers BABIP for Abs starting in each bin

babip_bin_pitcher_avgs <- sqldf("SELECT first_pitch_group, PitcherId, count(*) as numAtBats,
                   sum(cast(hits as float))/sum(cast(BIP as float)) AS babip_bin FROM lastPitches group by first_pitch_group, PitcherId ORDER BY count(*) DESC")



babip_bin_pitcher_avgs_ten_plus <- babip_bin_pitcher_avgs[babip_bin_pitcher_avgs$numAtBats >= 4, ]

Get the avg for each pitcher for all their batters faced

babip_pitcher_avgs <- sqldf("SELECT PitcherId, count(*),
                   sum(cast(hits as float))/sum(cast(BIP as float)) AS babip_avg FROM lastPitches group by PitcherId ORDER BY count(*) DESC")

babip_pitcher_avgs
ABCDEFGHIJ0123456789
PitcherId
<dbl>
count(*)
<int>
babip_avg
<dbl>
10000491764900.35474006
10000369223950.33884298
10000266103910.37676056
10000851933860.35121951
10000804013730.38783270
10000490783670.39639640
10000103533670.33613445
10000585393490.36575875
10000293263470.33170732
10000516773400.33160622
babipdf <- left_join(babip_bin_pitcher_avgs_ten_plus, babip_pitcher_avgs, on = 'PitcherId')
## Joining, by = "PitcherId"
babipdf$diff_babip <- babipdf$babip_bin - babipdf$babip_avg

Linear regression whip diff against pitch bin

babip_diff_mod <- lm(diff_babip ~ first_pitch_group, data = babipdf, weights = numAtBats)
summary(babip_diff_mod)
## 
## Call:
## lm(formula = diff_babip ~ first_pitch_group, data = babipdf, 
##     weights = numAtBats)
## 
## Weighted Residuals:
##      Min       1Q   Median       3Q      Max 
## -2.08710 -0.38630  0.00311  0.34908  2.52238 
## 
## Coefficients:
##                                          Estimate Std. Error t value Pr(>|t|)  
## (Intercept)                            -0.0029080  0.0019359  -1.502   0.1331  
## first_pitch_group10-19 pitches          0.0017310  0.0031112   0.556   0.5780  
## first_pitch_group20-29 pitches         -0.0010394  0.0035231  -0.295   0.7680  
## first_pitch_group30-39 pitches          0.0056509  0.0039906   1.416   0.1568  
## first_pitch_group40-49 pitches          0.0067702  0.0044781   1.512   0.1306  
## first_pitch_group50-59 pitches         -0.0007593  0.0049852  -0.152   0.8789  
## first_pitch_group60-69 pitches          0.0089646  0.0054537   1.644   0.1002  
## first_pitch_group70-79 pitches          0.0144982  0.0062474   2.321   0.0203 *
## first_pitch_group80-89 pitches          0.0067045  0.0075672   0.886   0.3756  
## first_pitch_group90-99 pitches          0.0081082  0.0108421   0.748   0.4546  
## first_pitch_groupMore Than 100 Pitches  0.0113297  0.0175016   0.647   0.5174  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.5593 on 21607 degrees of freedom
##   (111 observations deleted due to missingness)
## Multiple R-squared:  0.0005639,  Adjusted R-squared:  0.0001014 
## F-statistic: 1.219 on 10 and 21607 DF,  p-value: 0.2725
as.data.frame(babip_diff_mod$coefficients)
ABCDEFGHIJ0123456789
 
 
babip_diff_mod$coefficients
<dbl>
(Intercept)-0.0029080077
first_pitch_group10-19 pitches0.0017310108
first_pitch_group20-29 pitches-0.0010393972
first_pitch_group30-39 pitches0.0056509158
first_pitch_group40-49 pitches0.0067702383
first_pitch_group50-59 pitches-0.0007593253
first_pitch_group60-69 pitches0.0089646357
first_pitch_group70-79 pitches0.0144981573
first_pitch_group80-89 pitches0.0067045477
first_pitch_group90-99 pitches0.0081081554